oweller2 commited on
Commit
a05f134
1 Parent(s): 84cec5b

switch back

Browse files
Files changed (2) hide show
  1. config.json +3 -3
  2. modeling_flexbert.py +1 -0
config.json CHANGED
@@ -69,9 +69,9 @@
69
  "num_attention_heads": 12,
70
  "num_hidden_layers": 22,
71
  "num_initial_layers": 1,
72
- "pad_logits": false,
73
  "pad_token_id": 0,
74
- "padding": "padded",
75
  "pooling_type": "cls",
76
  "position_embedding_type": "absolute",
77
  "rotary_emb_base": 10000.0,
@@ -82,7 +82,7 @@
82
  "sliding_window": 128,
83
  "transformers_version": "4.44.1",
84
  "type_vocab_size": 2,
85
- "unpad_embeddings": false,
86
  "use_cache": true,
87
  "use_fa2": true,
88
  "use_sdpa_attn_mask": false,
 
69
  "num_attention_heads": 12,
70
  "num_hidden_layers": 22,
71
  "num_initial_layers": 1,
72
+ "pad_logits": true,
73
  "pad_token_id": 0,
74
+ "padding": "unpadded",
75
  "pooling_type": "cls",
76
  "position_embedding_type": "absolute",
77
  "rotary_emb_base": 10000.0,
 
82
  "sliding_window": 128,
83
  "transformers_version": "4.44.1",
84
  "type_vocab_size": 2,
85
+ "unpad_embeddings": true,
86
  "use_cache": true,
87
  "use_fa2": true,
88
  "use_sdpa_attn_mask": false,
modeling_flexbert.py CHANGED
@@ -1650,6 +1650,7 @@ class FlexBertForCausalLM(FlexBertPreTrainedModel):
1650
  input_ids, attention_mask, position_ids, labels
1651
  )
1652
 
 
1653
  hidden_states = self.bert(
1654
  input_ids,
1655
  attention_mask=None,
 
1650
  input_ids, attention_mask, position_ids, labels
1651
  )
1652
 
1653
+
1654
  hidden_states = self.bert(
1655
  input_ids,
1656
  attention_mask=None,