biggunnyso4
commited on
Commit
•
3fc3c42
1
Parent(s):
ac5cfbf
Update modeling.py
Browse files- modeling.py +13 -11
modeling.py
CHANGED
@@ -355,6 +355,7 @@ class NewEmbeddings(nn.Module):
|
|
355 |
attention_mask[i, l:] = 0
|
356 |
|
357 |
# Set attention_mask_bool for unpadding
|
|
|
358 |
if unpad_inputs:
|
359 |
attention_mask_bool = attention_mask.bool()
|
360 |
if length is None:
|
@@ -861,7 +862,8 @@ class NewModel(NewPreTrainedModel):
|
|
861 |
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
|
862 |
)
|
863 |
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
864 |
-
unpad_inputs = unpad_inputs if unpad_inputs is not None else self.config.unpad_inputs
|
|
|
865 |
output_padded = length is None
|
866 |
|
867 |
if input_ids is not None and inputs_embeds is not None:
|
@@ -1006,7 +1008,7 @@ class NewForMaskedLM(NewPreTrainedModel):
|
|
1006 |
"""
|
1007 |
|
1008 |
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
1009 |
-
|
1010 |
if labels is None or not self.new.config.unpad_inputs:
|
1011 |
length = None
|
1012 |
subset_indices = None
|
@@ -1027,7 +1029,7 @@ class NewForMaskedLM(NewPreTrainedModel):
|
|
1027 |
output_attentions=output_attentions,
|
1028 |
output_hidden_states=output_hidden_states,
|
1029 |
return_dict=return_dict,
|
1030 |
-
unpad_inputs=
|
1031 |
)
|
1032 |
|
1033 |
sequence_output = outputs[0]
|
@@ -1092,7 +1094,7 @@ class NewForSequenceClassification(NewPreTrainedModel):
|
|
1092 |
`config.num_labels > 1` a classification loss is computed (Cross-Entropy).
|
1093 |
"""
|
1094 |
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
1095 |
-
|
1096 |
outputs = self.new(
|
1097 |
input_ids,
|
1098 |
attention_mask=attention_mask,
|
@@ -1103,7 +1105,7 @@ class NewForSequenceClassification(NewPreTrainedModel):
|
|
1103 |
output_attentions=output_attentions,
|
1104 |
output_hidden_states=output_hidden_states,
|
1105 |
return_dict=return_dict,
|
1106 |
-
unpad_inputs=
|
1107 |
)
|
1108 |
|
1109 |
pooled_output = outputs[1]
|
@@ -1182,7 +1184,7 @@ class NewForMultipleChoice(NewPreTrainedModel):
|
|
1182 |
"""
|
1183 |
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
1184 |
num_choices = input_ids.shape[1] if input_ids is not None else inputs_embeds.shape[1]
|
1185 |
-
|
1186 |
input_ids = input_ids.view(-1, input_ids.size(-1)) if input_ids is not None else None
|
1187 |
attention_mask = attention_mask.view(-1, attention_mask.size(-1)) if attention_mask is not None else None
|
1188 |
token_type_ids = token_type_ids.view(-1, token_type_ids.size(-1)) if token_type_ids is not None else None
|
@@ -1203,7 +1205,7 @@ class NewForMultipleChoice(NewPreTrainedModel):
|
|
1203 |
output_attentions=output_attentions,
|
1204 |
output_hidden_states=output_hidden_states,
|
1205 |
return_dict=return_dict,
|
1206 |
-
unpad_inputs=
|
1207 |
)
|
1208 |
|
1209 |
pooled_output = outputs[1]
|
@@ -1263,7 +1265,7 @@ class NewForTokenClassification(NewPreTrainedModel):
|
|
1263 |
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
|
1264 |
"""
|
1265 |
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
1266 |
-
|
1267 |
outputs = self.new(
|
1268 |
input_ids,
|
1269 |
attention_mask=attention_mask,
|
@@ -1274,7 +1276,7 @@ class NewForTokenClassification(NewPreTrainedModel):
|
|
1274 |
output_attentions=output_attentions,
|
1275 |
output_hidden_states=output_hidden_states,
|
1276 |
return_dict=return_dict,
|
1277 |
-
unpad_inputs=
|
1278 |
)
|
1279 |
|
1280 |
sequence_output = outputs[0]
|
@@ -1336,7 +1338,7 @@ class NewForQuestionAnswering(NewPreTrainedModel):
|
|
1336 |
are not taken into account for computing the loss.
|
1337 |
"""
|
1338 |
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
1339 |
-
|
1340 |
outputs = self.new(
|
1341 |
input_ids,
|
1342 |
attention_mask=attention_mask,
|
@@ -1347,7 +1349,7 @@ class NewForQuestionAnswering(NewPreTrainedModel):
|
|
1347 |
output_attentions=output_attentions,
|
1348 |
output_hidden_states=output_hidden_states,
|
1349 |
return_dict=return_dict,
|
1350 |
-
unpad_inputs=
|
1351 |
)
|
1352 |
|
1353 |
sequence_output = outputs[0]
|
|
|
355 |
attention_mask[i, l:] = 0
|
356 |
|
357 |
# Set attention_mask_bool for unpadding
|
358 |
+
unpad_inputs = False
|
359 |
if unpad_inputs:
|
360 |
attention_mask_bool = attention_mask.bool()
|
361 |
if length is None:
|
|
|
862 |
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
|
863 |
)
|
864 |
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
865 |
+
# unpad_inputs = unpad_inputs if unpad_inputs is not None else self.config.unpad_inputs
|
866 |
+
unpad_inputs = False
|
867 |
output_padded = length is None
|
868 |
|
869 |
if input_ids is not None and inputs_embeds is not None:
|
|
|
1008 |
"""
|
1009 |
|
1010 |
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
1011 |
+
unpad_inputs = False
|
1012 |
if labels is None or not self.new.config.unpad_inputs:
|
1013 |
length = None
|
1014 |
subset_indices = None
|
|
|
1029 |
output_attentions=output_attentions,
|
1030 |
output_hidden_states=output_hidden_states,
|
1031 |
return_dict=return_dict,
|
1032 |
+
unpad_inputs=unpad_inputs,
|
1033 |
)
|
1034 |
|
1035 |
sequence_output = outputs[0]
|
|
|
1094 |
`config.num_labels > 1` a classification loss is computed (Cross-Entropy).
|
1095 |
"""
|
1096 |
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
1097 |
+
unpad_inputs = False
|
1098 |
outputs = self.new(
|
1099 |
input_ids,
|
1100 |
attention_mask=attention_mask,
|
|
|
1105 |
output_attentions=output_attentions,
|
1106 |
output_hidden_states=output_hidden_states,
|
1107 |
return_dict=return_dict,
|
1108 |
+
unpad_inputs=unpad_inputs,
|
1109 |
)
|
1110 |
|
1111 |
pooled_output = outputs[1]
|
|
|
1184 |
"""
|
1185 |
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
1186 |
num_choices = input_ids.shape[1] if input_ids is not None else inputs_embeds.shape[1]
|
1187 |
+
unpad_inputs = False
|
1188 |
input_ids = input_ids.view(-1, input_ids.size(-1)) if input_ids is not None else None
|
1189 |
attention_mask = attention_mask.view(-1, attention_mask.size(-1)) if attention_mask is not None else None
|
1190 |
token_type_ids = token_type_ids.view(-1, token_type_ids.size(-1)) if token_type_ids is not None else None
|
|
|
1205 |
output_attentions=output_attentions,
|
1206 |
output_hidden_states=output_hidden_states,
|
1207 |
return_dict=return_dict,
|
1208 |
+
unpad_inputs=unpad_inputs,
|
1209 |
)
|
1210 |
|
1211 |
pooled_output = outputs[1]
|
|
|
1265 |
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
|
1266 |
"""
|
1267 |
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
1268 |
+
unpad_inputs = False
|
1269 |
outputs = self.new(
|
1270 |
input_ids,
|
1271 |
attention_mask=attention_mask,
|
|
|
1276 |
output_attentions=output_attentions,
|
1277 |
output_hidden_states=output_hidden_states,
|
1278 |
return_dict=return_dict,
|
1279 |
+
unpad_inputs=unpad_inputs,
|
1280 |
)
|
1281 |
|
1282 |
sequence_output = outputs[0]
|
|
|
1338 |
are not taken into account for computing the loss.
|
1339 |
"""
|
1340 |
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
1341 |
+
unpad_inputs = False
|
1342 |
outputs = self.new(
|
1343 |
input_ids,
|
1344 |
attention_mask=attention_mask,
|
|
|
1349 |
output_attentions=output_attentions,
|
1350 |
output_hidden_states=output_hidden_states,
|
1351 |
return_dict=return_dict,
|
1352 |
+
unpad_inputs=unpad_inputs,
|
1353 |
)
|
1354 |
|
1355 |
sequence_output = outputs[0]
|