oweller2
commited on
Commit
•
9f10682
1
Parent(s):
4904e15
add unpad back in with created attn_mask
Browse files- config.json +1 -1
- modeling_flexbert.py +2 -0
config.json
CHANGED
@@ -82,7 +82,7 @@
|
|
82 |
"sliding_window": 128,
|
83 |
"transformers_version": "4.44.1",
|
84 |
"type_vocab_size": 2,
|
85 |
-
"unpad_embeddings":
|
86 |
"use_cache": true,
|
87 |
"use_fa2": true,
|
88 |
"use_sdpa_attn_mask": false,
|
|
|
82 |
"sliding_window": 128,
|
83 |
"transformers_version": "4.44.1",
|
84 |
"type_vocab_size": 2,
|
85 |
+
"unpad_embeddings": true,
|
86 |
"use_cache": true,
|
87 |
"use_fa2": true,
|
88 |
"use_sdpa_attn_mask": false,
|
modeling_flexbert.py
CHANGED
@@ -1643,6 +1643,8 @@ class FlexBertForCausalLM(FlexBertPreTrainedModel):
|
|
1643 |
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
1644 |
if self.unpad_embeddings and (indices is None and cu_seqlens is None and max_seqlen is None):
|
1645 |
batch_size, seq_len = input_ids.shape[:2]
|
|
|
|
|
1646 |
input_ids, indices, cu_seqlens, max_seqlen, position_ids, labels = self.unpad_inputs(
|
1647 |
input_ids, attention_mask, position_ids, labels
|
1648 |
)
|
|
|
1643 |
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
1644 |
if self.unpad_embeddings and (indices is None and cu_seqlens is None and max_seqlen is None):
|
1645 |
batch_size, seq_len = input_ids.shape[:2]
|
1646 |
+
if attention_mask is None: # Create causal mask (lower triangular)
|
1647 |
+
attention_mask = torch.tril(torch.ones(batch, seqlen), diagonal=0)
|
1648 |
input_ids, indices, cu_seqlens, max_seqlen, position_ids, labels = self.unpad_inputs(
|
1649 |
input_ids, attention_mask, position_ids, labels
|
1650 |
)
|