orionweller
/

test-flex-gpt

Model card Files Files and versions Community

oweller2 commited on 26 days ago

Commit

e9e8f85

•

1 Parent(s): 46797c8

minor

Files changed (2) hide show

config.json +3 -3
modeling_flexbert.py +3 -2

config.json CHANGED Viewed

@@ -69,9 +69,9 @@
   "num_attention_heads": 12,
   "num_hidden_layers": 22,
   "num_initial_layers": 1,
-  "pad_logits": false,
   "pad_token_id": 50283,
-  "padding": "padded",
   "pooling_type": "cls",
   "position_embedding_type": "absolute",
   "rotary_emb_base": 10000.0,
@@ -82,7 +82,7 @@
   "sliding_window": 128,
   "transformers_version": "4.44.1",
   "type_vocab_size": 2,
-  "unpad_embeddings": false,
   "use_cache": true,
   "use_fa2": true,
   "use_sdpa_attn_mask": false,

   "num_attention_heads": 12,
   "num_hidden_layers": 22,
   "num_initial_layers": 1,
+  "pad_logits": true,
   "pad_token_id": 50283,
+  "padding": "unpadded",
   "pooling_type": "cls",
   "position_embedding_type": "absolute",
   "rotary_emb_base": 10000.0,
   "sliding_window": 128,
   "transformers_version": "4.44.1",
   "type_vocab_size": 2,
+  "unpad_embeddings": true,
   "use_cache": true,
   "use_fa2": true,
   "use_sdpa_attn_mask": false,

modeling_flexbert.py CHANGED Viewed

@@ -935,6 +935,7 @@ class FlexBertModel(FlexBertPreTrainedModel):
         else:
             self.final_norm = None
         self.unpad_embeddings = config.unpad_embeddings
     def post_init(self):
         self._init_weights(reset_params=False)
@@ -956,7 +957,7 @@ class FlexBertModel(FlexBertPreTrainedModel):
         max_seqlen: Optional[int] = None,
         **kwargs,
     ) -> Tuple[Union[List[torch.Tensor], torch.Tensor], Optional[torch.Tensor]]:
-        if attention_mask is None:
             attention_mask = torch.ones_like(input_ids)
         embedding_output = self.embeddings(input_ids, position_ids)
@@ -1529,7 +1530,7 @@ class FlexBertForCausalLM(FlexBertPreTrainedModel):
         self.unpad_embeddings = config.unpad_embeddings
         self.pad_logits = config.pad_logits
         self.compile_model = config.compile_model
-        # self.masked_prediction = config.masked_prediction
         # Initialize weights and apply final processing
         self._init_weights(reset_params=False)

         else:
             self.final_norm = None
         self.unpad_embeddings = config.unpad_embeddings
+        self.is_decoder = config.causal_mask
     def post_init(self):
         self._init_weights(reset_params=False)
         max_seqlen: Optional[int] = None,
         **kwargs,
     ) -> Tuple[Union[List[torch.Tensor], torch.Tensor], Optional[torch.Tensor]]:
+        if attention_mask is None and not self.is_decoder:
             attention_mask = torch.ones_like(input_ids)
         embedding_output = self.embeddings(input_ids, position_ids)
         self.unpad_embeddings = config.unpad_embeddings
         self.pad_logits = config.pad_logits
         self.compile_model = config.compile_model
+        self.masked_prediction = config.masked_prediction
         # Initialize weights and apply final processing
         self._init_weights(reset_params=False)