feat: added return_dict
Browse files- modeling_bert.py +4 -0
modeling_bert.py
CHANGED
@@ -379,6 +379,7 @@ class BertModel(BertPreTrainedModel):
|
|
379 |
task_type_ids=None,
|
380 |
attention_mask=None,
|
381 |
masked_tokens_mask=None,
|
|
|
382 |
):
|
383 |
"""If masked_tokens_mask is not None (i.e. last_layer_subset == True in BertForPreTraining),
|
384 |
we only want the output for the masked tokens. This means that we only compute the last
|
@@ -429,6 +430,9 @@ class BertModel(BertPreTrainedModel):
|
|
429 |
sequence_output = sequence_output[masked_tokens_mask[subset_mask]]
|
430 |
pooled_output = self.pooler(pool_input, pool=False) if self.pooler is not None else None
|
431 |
|
|
|
|
|
|
|
432 |
return BaseModelOutputWithPoolingAndCrossAttentions(
|
433 |
last_hidden_state=sequence_output,
|
434 |
pooler_output=pooled_output,
|
|
|
379 |
task_type_ids=None,
|
380 |
attention_mask=None,
|
381 |
masked_tokens_mask=None,
|
382 |
+
return_dict=True,
|
383 |
):
|
384 |
"""If masked_tokens_mask is not None (i.e. last_layer_subset == True in BertForPreTraining),
|
385 |
we only want the output for the masked tokens. This means that we only compute the last
|
|
|
430 |
sequence_output = sequence_output[masked_tokens_mask[subset_mask]]
|
431 |
pooled_output = self.pooler(pool_input, pool=False) if self.pooler is not None else None
|
432 |
|
433 |
+
if not return_dict:
|
434 |
+
return (sequence_output, pooled_output)
|
435 |
+
|
436 |
return BaseModelOutputWithPoolingAndCrossAttentions(
|
437 |
last_hidden_state=sequence_output,
|
438 |
pooler_output=pooled_output,
|