biggunnyso4 commited on
Commit
3fc3c42
1 Parent(s): ac5cfbf

Update modeling.py

Browse files
Files changed (1) hide show
  1. modeling.py +13 -11
modeling.py CHANGED
@@ -355,6 +355,7 @@ class NewEmbeddings(nn.Module):
355
  attention_mask[i, l:] = 0
356
 
357
  # Set attention_mask_bool for unpadding
 
358
  if unpad_inputs:
359
  attention_mask_bool = attention_mask.bool()
360
  if length is None:
@@ -861,7 +862,8 @@ class NewModel(NewPreTrainedModel):
861
  output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
862
  )
863
  return_dict = return_dict if return_dict is not None else self.config.use_return_dict
864
- unpad_inputs = unpad_inputs if unpad_inputs is not None else self.config.unpad_inputs
 
865
  output_padded = length is None
866
 
867
  if input_ids is not None and inputs_embeds is not None:
@@ -1006,7 +1008,7 @@ class NewForMaskedLM(NewPreTrainedModel):
1006
  """
1007
 
1008
  return_dict = return_dict if return_dict is not None else self.config.use_return_dict
1009
-
1010
  if labels is None or not self.new.config.unpad_inputs:
1011
  length = None
1012
  subset_indices = None
@@ -1027,7 +1029,7 @@ class NewForMaskedLM(NewPreTrainedModel):
1027
  output_attentions=output_attentions,
1028
  output_hidden_states=output_hidden_states,
1029
  return_dict=return_dict,
1030
- unpad_inputs=False,
1031
  )
1032
 
1033
  sequence_output = outputs[0]
@@ -1092,7 +1094,7 @@ class NewForSequenceClassification(NewPreTrainedModel):
1092
  `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
1093
  """
1094
  return_dict = return_dict if return_dict is not None else self.config.use_return_dict
1095
-
1096
  outputs = self.new(
1097
  input_ids,
1098
  attention_mask=attention_mask,
@@ -1103,7 +1105,7 @@ class NewForSequenceClassification(NewPreTrainedModel):
1103
  output_attentions=output_attentions,
1104
  output_hidden_states=output_hidden_states,
1105
  return_dict=return_dict,
1106
- unpad_inputs=False,
1107
  )
1108
 
1109
  pooled_output = outputs[1]
@@ -1182,7 +1184,7 @@ class NewForMultipleChoice(NewPreTrainedModel):
1182
  """
1183
  return_dict = return_dict if return_dict is not None else self.config.use_return_dict
1184
  num_choices = input_ids.shape[1] if input_ids is not None else inputs_embeds.shape[1]
1185
-
1186
  input_ids = input_ids.view(-1, input_ids.size(-1)) if input_ids is not None else None
1187
  attention_mask = attention_mask.view(-1, attention_mask.size(-1)) if attention_mask is not None else None
1188
  token_type_ids = token_type_ids.view(-1, token_type_ids.size(-1)) if token_type_ids is not None else None
@@ -1203,7 +1205,7 @@ class NewForMultipleChoice(NewPreTrainedModel):
1203
  output_attentions=output_attentions,
1204
  output_hidden_states=output_hidden_states,
1205
  return_dict=return_dict,
1206
- unpad_inputs=False,
1207
  )
1208
 
1209
  pooled_output = outputs[1]
@@ -1263,7 +1265,7 @@ class NewForTokenClassification(NewPreTrainedModel):
1263
  Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
1264
  """
1265
  return_dict = return_dict if return_dict is not None else self.config.use_return_dict
1266
-
1267
  outputs = self.new(
1268
  input_ids,
1269
  attention_mask=attention_mask,
@@ -1274,7 +1276,7 @@ class NewForTokenClassification(NewPreTrainedModel):
1274
  output_attentions=output_attentions,
1275
  output_hidden_states=output_hidden_states,
1276
  return_dict=return_dict,
1277
- unpad_inputs=False,
1278
  )
1279
 
1280
  sequence_output = outputs[0]
@@ -1336,7 +1338,7 @@ class NewForQuestionAnswering(NewPreTrainedModel):
1336
  are not taken into account for computing the loss.
1337
  """
1338
  return_dict = return_dict if return_dict is not None else self.config.use_return_dict
1339
-
1340
  outputs = self.new(
1341
  input_ids,
1342
  attention_mask=attention_mask,
@@ -1347,7 +1349,7 @@ class NewForQuestionAnswering(NewPreTrainedModel):
1347
  output_attentions=output_attentions,
1348
  output_hidden_states=output_hidden_states,
1349
  return_dict=return_dict,
1350
- unpad_inputs=False,
1351
  )
1352
 
1353
  sequence_output = outputs[0]
 
355
  attention_mask[i, l:] = 0
356
 
357
  # Set attention_mask_bool for unpadding
358
+ unpad_inputs = False
359
  if unpad_inputs:
360
  attention_mask_bool = attention_mask.bool()
361
  if length is None:
 
862
  output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
863
  )
864
  return_dict = return_dict if return_dict is not None else self.config.use_return_dict
865
+ # unpad_inputs = unpad_inputs if unpad_inputs is not None else self.config.unpad_inputs
866
+ unpad_inputs = False
867
  output_padded = length is None
868
 
869
  if input_ids is not None and inputs_embeds is not None:
 
1008
  """
1009
 
1010
  return_dict = return_dict if return_dict is not None else self.config.use_return_dict
1011
+ unpad_inputs = False
1012
  if labels is None or not self.new.config.unpad_inputs:
1013
  length = None
1014
  subset_indices = None
 
1029
  output_attentions=output_attentions,
1030
  output_hidden_states=output_hidden_states,
1031
  return_dict=return_dict,
1032
+ unpad_inputs=unpad_inputs,
1033
  )
1034
 
1035
  sequence_output = outputs[0]
 
1094
  `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
1095
  """
1096
  return_dict = return_dict if return_dict is not None else self.config.use_return_dict
1097
+ unpad_inputs = False
1098
  outputs = self.new(
1099
  input_ids,
1100
  attention_mask=attention_mask,
 
1105
  output_attentions=output_attentions,
1106
  output_hidden_states=output_hidden_states,
1107
  return_dict=return_dict,
1108
+ unpad_inputs=unpad_inputs,
1109
  )
1110
 
1111
  pooled_output = outputs[1]
 
1184
  """
1185
  return_dict = return_dict if return_dict is not None else self.config.use_return_dict
1186
  num_choices = input_ids.shape[1] if input_ids is not None else inputs_embeds.shape[1]
1187
+ unpad_inputs = False
1188
  input_ids = input_ids.view(-1, input_ids.size(-1)) if input_ids is not None else None
1189
  attention_mask = attention_mask.view(-1, attention_mask.size(-1)) if attention_mask is not None else None
1190
  token_type_ids = token_type_ids.view(-1, token_type_ids.size(-1)) if token_type_ids is not None else None
 
1205
  output_attentions=output_attentions,
1206
  output_hidden_states=output_hidden_states,
1207
  return_dict=return_dict,
1208
+ unpad_inputs=unpad_inputs,
1209
  )
1210
 
1211
  pooled_output = outputs[1]
 
1265
  Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
1266
  """
1267
  return_dict = return_dict if return_dict is not None else self.config.use_return_dict
1268
+ unpad_inputs = False
1269
  outputs = self.new(
1270
  input_ids,
1271
  attention_mask=attention_mask,
 
1276
  output_attentions=output_attentions,
1277
  output_hidden_states=output_hidden_states,
1278
  return_dict=return_dict,
1279
+ unpad_inputs=unpad_inputs,
1280
  )
1281
 
1282
  sequence_output = outputs[0]
 
1338
  are not taken into account for computing the loss.
1339
  """
1340
  return_dict = return_dict if return_dict is not None else self.config.use_return_dict
1341
+ unpad_inputs = False
1342
  outputs = self.new(
1343
  input_ids,
1344
  attention_mask=attention_mask,
 
1349
  output_attentions=output_attentions,
1350
  output_hidden_states=output_hidden_states,
1351
  return_dict=return_dict,
1352
+ unpad_inputs=unpad_inputs,
1353
  )
1354
 
1355
  sequence_output = outputs[0]