orionweller
/

test-flex-gpt

Model card Files Files and versions Community

oweller2 commited on 26 days ago

Commit

46797c8

•

1 Parent(s): 81b671b

no pad at inference

Files changed (1) hide show

modeling_flexbert.py +1 -3

modeling_flexbert.py CHANGED Viewed

@@ -1721,14 +1721,12 @@ class FlexBertForCausalLM(FlexBertPreTrainedModel):
             # only last token for inputs if past is defined
             if past_key_values is not None:
                 input_ids = input_ids[:, -1].unsqueeze(-1)
-                if attention_mask is not None:
-                    attention_mask = attention_mask[:, -1:]
             return {
                 "input_ids": input_ids,
                 "past_key_values": past_key_values,
                 "use_cache": kwargs.get("use_cache", True),
-                "attention_mask": attention_mask,
             }
     def get_number_parameters(self, count_embeddings: bool = True, trainable: bool = True) -> int:

             # only last token for inputs if past is defined
             if past_key_values is not None:
                 input_ids = input_ids[:, -1].unsqueeze(-1)
             return {
                 "input_ids": input_ids,
                 "past_key_values": past_key_values,
                 "use_cache": kwargs.get("use_cache", True),
+                "attention_mask": None,
             }
     def get_number_parameters(self, count_embeddings: bool = True, trainable: bool = True) -> int: