Matthijs commited on
Commit
aab0f05
·
1 Parent(s): 789f40d
config.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "activation": "gelu",
3
  "architectures": [
4
- "DistilBertForSequenceClassification_ANE"
5
  ],
6
  "attention_dropout": 0.1,
7
  "auto_map": {
8
- "AutoConfig": "configuration_distilbert_ane.DistilBertConfig_ANE",
9
- "AutoModelForSequenceClassification": "modeling_distilbert_ane.DistilBertForSequenceClassification_ANE"
10
  },
11
  "dim": 768,
12
  "dropout": 0.1,
@@ -22,7 +22,7 @@
22
  "POSITIVE": 1
23
  },
24
  "max_position_embeddings": 512,
25
- "model_type": "distilbert_ane",
26
  "n_heads": 12,
27
  "n_layers": 6,
28
  "output_past": true,
 
1
  {
2
  "activation": "gelu",
3
  "architectures": [
4
+ "DistilBertForSequenceClassification"
5
  ],
6
  "attention_dropout": 0.1,
7
  "auto_map": {
8
+ "AutoConfig": "configuration_distilbert_ane.DistilBertConfig",
9
+ "AutoModelForSequenceClassification": "modeling_distilbert_ane.DistilBertForSequenceClassification"
10
  },
11
  "dim": 768,
12
  "dropout": 0.1,
 
22
  "POSITIVE": 1
23
  },
24
  "max_position_embeddings": 512,
25
+ "model_type": "distilbert",
26
  "n_heads": 12,
27
  "n_layers": 6,
28
  "output_past": true,
configuration_distilbert_ane.py CHANGED
@@ -1,7 +1,5 @@
1
- from transformers import DistilBertConfig
2
-
3
- class DistilBertConfig_ANE(DistilBertConfig):
4
- model_type = "distilbert_ane"
5
 
 
6
  def __init__(self, **kwargs):
7
  super().__init__(**kwargs)
 
1
+ from transformers.models.distilbert import configuration_distilbert
 
 
 
2
 
3
+ class DistilBertConfig(configuration_distilbert.DistilBertConfig):
4
  def __init__(self, **kwargs):
5
  super().__init__(**kwargs)
modeling_distilbert_ane.py CHANGED
@@ -43,7 +43,7 @@ import torch
43
  import torch.nn as nn
44
 
45
  from transformers.models.distilbert import modeling_distilbert
46
- from .configuration_distilbert_ane import DistilBertConfig_ANE
47
 
48
  # Note: Original implementation of distilbert uses an epsilon value of 1e-12
49
  # which is not friendly with the float16 precision that ANE uses by default
@@ -317,8 +317,8 @@ class Transformer(modeling_distilbert.Transformer):
317
  [TransformerBlock(config) for _ in range(config.n_layers)]))
318
 
319
 
320
- class DistilBertModel_ANE(modeling_distilbert.DistilBertModel):
321
- config_class = DistilBertConfig_ANE
322
 
323
  def __init__(self, config):
324
  super().__init__(config)
@@ -332,14 +332,14 @@ class DistilBertModel_ANE(modeling_distilbert.DistilBertModel):
332
  raise NotImplementedError
333
 
334
 
335
- class DistilBertForMaskedLM_ANE(modeling_distilbert.DistilBertForMaskedLM):
336
- config_class = DistilBertConfig_ANE
337
 
338
  def __init__(self, config):
339
  super().__init__(config)
340
  from transformers.activations import get_activation
341
  setattr(self, 'activation', get_activation(config.activation))
342
- setattr(self, 'distilbert', DistilBertModel_ANE(config))
343
  setattr(self, 'vocab_transform', nn.Conv2d(config.dim, config.dim, 1))
344
  setattr(self, 'vocab_layer_norm', LayerNormANE(config.dim, eps=EPS))
345
  setattr(self, 'vocab_projector',
@@ -390,13 +390,13 @@ class DistilBertForMaskedLM_ANE(modeling_distilbert.DistilBertForMaskedLM):
390
  return ((mlm_loss, ) + output) if mlm_loss is not None else output
391
 
392
 
393
- class DistilBertForSequenceClassification_ANE(
394
  modeling_distilbert.DistilBertForSequenceClassification):
395
- config_class = DistilBertConfig_ANE
396
 
397
  def __init__(self, config):
398
  super().__init__(config)
399
- setattr(self, 'distilbert', DistilBertModel_ANE(config))
400
  setattr(self, 'pre_classifier', nn.Conv2d(config.dim, config.dim, 1))
401
  setattr(self, 'classifier', nn.Conv2d(config.dim, config.num_labels,
402
  1))
@@ -441,13 +441,13 @@ class DistilBertForSequenceClassification_ANE(
441
  return ((loss, ) + output) if loss is not None else output
442
 
443
 
444
- class DistilBertForQuestionAnswering_ANE(
445
  modeling_distilbert.DistilBertForQuestionAnswering):
446
- config_class = DistilBertConfig_ANE
447
 
448
  def __init__(self, config):
449
  super().__init__(config)
450
- setattr(self, 'distilbert', DistilBertModel_ANE(config))
451
  setattr(self, 'qa_outputs', nn.Conv2d(config.dim, config.num_labels,
452
  1))
453
 
@@ -497,12 +497,12 @@ class DistilBertForQuestionAnswering_ANE(
497
  return ((total_loss, ) + output) if total_loss is not None else output
498
 
499
 
500
- class DistilBertForTokenClassification_ANE(
501
  modeling_distilbert.DistilBertForTokenClassification):
502
 
503
  def __init__(self, config):
504
  super().__init__(config)
505
- setattr(self, 'distilbert', DistilBertModel_ANE(config))
506
  setattr(self, 'classifier',
507
  nn.Conv2d(config.hidden_size, config.num_labels, 1))
508
 
@@ -544,13 +544,13 @@ class DistilBertForTokenClassification_ANE(
544
  return ((loss, ) + output) if loss is not None else output
545
 
546
 
547
- class DistilBertForMultipleChoice_ANE(
548
  modeling_distilbert.DistilBertForMultipleChoice):
549
- config_class = DistilBertConfig_ANE
550
 
551
  def __init__(self, config):
552
  super().__init__(config)
553
- setattr(self, 'distilbert', DistilBertModel_ANE(config))
554
  setattr(self, 'pre_classifier', nn.Conv2d(config.dim, config.dim, 1))
555
  setattr(self, 'classifier', nn.Conv2d(config.dim, 1, 1))
556
 
 
43
  import torch.nn as nn
44
 
45
  from transformers.models.distilbert import modeling_distilbert
46
+ from .configuration_distilbert_ane import DistilBertConfig
47
 
48
  # Note: Original implementation of distilbert uses an epsilon value of 1e-12
49
  # which is not friendly with the float16 precision that ANE uses by default
 
317
  [TransformerBlock(config) for _ in range(config.n_layers)]))
318
 
319
 
320
+ class DistilBertModel(modeling_distilbert.DistilBertModel):
321
+ config_class = DistilBertConfig
322
 
323
  def __init__(self, config):
324
  super().__init__(config)
 
332
  raise NotImplementedError
333
 
334
 
335
+ class DistilBertForMaskedLM(modeling_distilbert.DistilBertForMaskedLM):
336
+ config_class = DistilBertConfig
337
 
338
  def __init__(self, config):
339
  super().__init__(config)
340
  from transformers.activations import get_activation
341
  setattr(self, 'activation', get_activation(config.activation))
342
+ setattr(self, 'distilbert', DistilBertModel(config))
343
  setattr(self, 'vocab_transform', nn.Conv2d(config.dim, config.dim, 1))
344
  setattr(self, 'vocab_layer_norm', LayerNormANE(config.dim, eps=EPS))
345
  setattr(self, 'vocab_projector',
 
390
  return ((mlm_loss, ) + output) if mlm_loss is not None else output
391
 
392
 
393
+ class DistilBertForSequenceClassification(
394
  modeling_distilbert.DistilBertForSequenceClassification):
395
+ config_class = DistilBertConfig
396
 
397
  def __init__(self, config):
398
  super().__init__(config)
399
+ setattr(self, 'distilbert', DistilBertModel(config))
400
  setattr(self, 'pre_classifier', nn.Conv2d(config.dim, config.dim, 1))
401
  setattr(self, 'classifier', nn.Conv2d(config.dim, config.num_labels,
402
  1))
 
441
  return ((loss, ) + output) if loss is not None else output
442
 
443
 
444
+ class DistilBertForQuestionAnswering(
445
  modeling_distilbert.DistilBertForQuestionAnswering):
446
+ config_class = DistilBertConfig
447
 
448
  def __init__(self, config):
449
  super().__init__(config)
450
+ setattr(self, 'distilbert', DistilBertModel(config))
451
  setattr(self, 'qa_outputs', nn.Conv2d(config.dim, config.num_labels,
452
  1))
453
 
 
497
  return ((total_loss, ) + output) if total_loss is not None else output
498
 
499
 
500
+ class DistilBertForTokenClassification(
501
  modeling_distilbert.DistilBertForTokenClassification):
502
 
503
  def __init__(self, config):
504
  super().__init__(config)
505
+ setattr(self, 'distilbert', DistilBertModel(config))
506
  setattr(self, 'classifier',
507
  nn.Conv2d(config.hidden_size, config.num_labels, 1))
508
 
 
544
  return ((loss, ) + output) if loss is not None else output
545
 
546
 
547
+ class DistilBertForMultipleChoice(
548
  modeling_distilbert.DistilBertForMultipleChoice):
549
+ config_class = DistilBertConfig
550
 
551
  def __init__(self, config):
552
  super().__init__(config)
553
+ setattr(self, 'distilbert', DistilBertModel(config))
554
  setattr(self, 'pre_classifier', nn.Conv2d(config.dim, config.dim, 1))
555
  setattr(self, 'classifier', nn.Conv2d(config.dim, 1, 1))
556