orionweller
/

test-flex-gpt

PyTorch

flex_bert

custom_code

Model card Files Files and versions Community

oweller2 commited on 27 days ago

Commit

11a83af

•

1 Parent(s): f5a1962

fix

Browse files

Files changed (2) hide show

modeling_flexbert.py +9 -12
pytorch_model.bin +1 -1

modeling_flexbert.py CHANGED Viewed

@@ -1666,42 +1666,39 @@ class FlexBertForCausalLM(FlexBertPreTrainedModel):
         loss = None
         if labels is not None:
             if cu_seqlens is not None:
-                shift_labels = torch.full_like(input_ids, -100)
-                shift_labels[:-1] = input_ids[1:]
                 # Mask boundaries, so eos doesn't predict bos
                 for i in range(len(cu_seqlens) - 1):
                     boundary_pos = cu_seqlens[i+1] - 1
-                    shift_labels[boundary_pos] = -100
                 # NOTE: no padding or mask in there for now
                 assert 50283 not in shift_labels, f"PAD token found in shift_labels: {shift_labels}"
                 assert 50284 not in shift_labels, f"MASK token found in shift_labels: {shift_labels}"
-                assert shift_labels.shape == logits.shape[:-1] # Verify shapes align
             else:
                 # Padded case: simple shift
                 shift_labels = input_ids[..., 1:].contiguous()
-                logits = logits[..., :-1, :].contiguous()
                 # mask out PAD tokens in the shift_labels
                 mask = (shift_labels == 50283)
                 shift_labels = torch.where(mask, torch.tensor(-100, device=shift_labels.device), shift_labels)
-                assert shift_labels.shape == logits.shape[:-1] # Verify shapes align
             # For both cases, we'll use the shifted input_ids as our labels
             labels = shift_labels
             # Flatten the tokens
-            loss = self.loss_fn(
-                logits.view(-1, logits.size(-1)),
-                shift_labels.view(-1)
-            )
         if self.pad_logits:
             return CausalLMOutput(
                 loss=loss,
                 logits=self.pad_inputs(logits, indices, batch_size, seq_len)[0],
-                hidden_states=None,
                 attentions=None,
             )
         else:

         loss = None
         if labels is not None:
             if cu_seqlens is not None:
+                shift_labels = input_ids[1:].clone()
+                loss_logits = logits[:-1]  # Only shift for loss
                 # Mask boundaries, so eos doesn't predict bos
                 for i in range(len(cu_seqlens) - 1):
                     boundary_pos = cu_seqlens[i+1] - 1
+                    if boundary_pos < len(shift_labels):
+                        shift_labels[boundary_pos] = -100
                 # NOTE: no padding or mask in there for now
                 assert 50283 not in shift_labels, f"PAD token found in shift_labels: {shift_labels}"
                 assert 50284 not in shift_labels, f"MASK token found in shift_labels: {shift_labels}"
+                assert shift_labels.shape[0] == loss_logits.shape[0] # Verify shapes align
             else:
                 # Padded case: simple shift
                 shift_labels = input_ids[..., 1:].contiguous()
+                loss_logits = logits[..., :-1, :].contiguous()
                 # mask out PAD tokens in the shift_labels
                 mask = (shift_labels == 50283)
                 shift_labels = torch.where(mask, torch.tensor(-100, device=shift_labels.device), shift_labels)
+                assert shift_labels.shape[0] == loss_logits.shape[0] # Verify shapes align
             # For both cases, we'll use the shifted input_ids as our labels
             labels = shift_labels
             # Flatten the tokens
+            loss = self.loss_fn(loss_logits.view(-1, loss_logits.size(-1)), shift_labels.view(-1))
         if self.pad_logits:
             return CausalLMOutput(
                 loss=loss,
                 logits=self.pad_inputs(logits, indices, batch_size, seq_len)[0],
+                hidden_states=hidden_states,
                 attentions=None,
             )
         else:

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c0bf65e93c1438333b19d7cc744b62913df117b318d7acb300c23f6c202a00e0
 size 598685038

 version https://git-lfs.github.com/spec/v1
+oid sha256:823aa77eddb7f9291beddc92b7d093b34962c129c0f6d674b4390f4f54441081
 size 598685038