oweller2 commited on
Commit
0b90701
1 Parent(s): 9e4ff15
Files changed (1) hide show
  1. modeling_flexbert.py +5 -0
modeling_flexbert.py CHANGED
@@ -1721,6 +1721,11 @@ class FlexBertForCausalLM(FlexBertPreTrainedModel):
1721
  ) -> dict:
1722
  if attention_mask is None:
1723
  attention_mask = torch.ones_like(input_ids)
 
 
 
 
 
1724
 
1725
  batch_size, seq_len = input_ids.shape[:2]
1726
  input_ids, indices, cu_seqlens, max_seqlen, position_ids, _ = self.unpad_inputs(
 
1721
  ) -> dict:
1722
  if attention_mask is None:
1723
  attention_mask = torch.ones_like(input_ids)
1724
+
1725
+ # Calculate positions before unpadding
1726
+ if position_ids is None:
1727
+ position_ids = attention_mask.long().cumsum(-1) - 1
1728
+ position_ids.masked_fill_(attention_mask == 0, 1)
1729
 
1730
  batch_size, seq_len = input_ids.shape[:2]
1731
  input_ids, indices, cu_seqlens, max_seqlen, position_ids, _ = self.unpad_inputs(