oweller2
commited on
Commit
•
a05f134
1
Parent(s):
84cec5b
switch back
Browse files- config.json +3 -3
- modeling_flexbert.py +1 -0
config.json
CHANGED
@@ -69,9 +69,9 @@
|
|
69 |
"num_attention_heads": 12,
|
70 |
"num_hidden_layers": 22,
|
71 |
"num_initial_layers": 1,
|
72 |
-
"pad_logits":
|
73 |
"pad_token_id": 0,
|
74 |
-
"padding": "
|
75 |
"pooling_type": "cls",
|
76 |
"position_embedding_type": "absolute",
|
77 |
"rotary_emb_base": 10000.0,
|
@@ -82,7 +82,7 @@
|
|
82 |
"sliding_window": 128,
|
83 |
"transformers_version": "4.44.1",
|
84 |
"type_vocab_size": 2,
|
85 |
-
"unpad_embeddings":
|
86 |
"use_cache": true,
|
87 |
"use_fa2": true,
|
88 |
"use_sdpa_attn_mask": false,
|
|
|
69 |
"num_attention_heads": 12,
|
70 |
"num_hidden_layers": 22,
|
71 |
"num_initial_layers": 1,
|
72 |
+
"pad_logits": true,
|
73 |
"pad_token_id": 0,
|
74 |
+
"padding": "unpadded",
|
75 |
"pooling_type": "cls",
|
76 |
"position_embedding_type": "absolute",
|
77 |
"rotary_emb_base": 10000.0,
|
|
|
82 |
"sliding_window": 128,
|
83 |
"transformers_version": "4.44.1",
|
84 |
"type_vocab_size": 2,
|
85 |
+
"unpad_embeddings": true,
|
86 |
"use_cache": true,
|
87 |
"use_fa2": true,
|
88 |
"use_sdpa_attn_mask": false,
|
modeling_flexbert.py
CHANGED
@@ -1650,6 +1650,7 @@ class FlexBertForCausalLM(FlexBertPreTrainedModel):
|
|
1650 |
input_ids, attention_mask, position_ids, labels
|
1651 |
)
|
1652 |
|
|
|
1653 |
hidden_states = self.bert(
|
1654 |
input_ids,
|
1655 |
attention_mask=None,
|
|
|
1650 |
input_ids, attention_mask, position_ids, labels
|
1651 |
)
|
1652 |
|
1653 |
+
|
1654 |
hidden_states = self.bert(
|
1655 |
input_ids,
|
1656 |
attention_mask=None,
|