add model
Browse files- config.json +4 -4
- configuration_distilbert_ane.py +2 -4
- modeling_distilbert_ane.py +17 -17
config.json
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
{
|
2 |
"activation": "gelu",
|
3 |
"architectures": [
|
4 |
-
"
|
5 |
],
|
6 |
"attention_dropout": 0.1,
|
7 |
"auto_map": {
|
8 |
-
"AutoConfig": "configuration_distilbert_ane.
|
9 |
-
"AutoModelForSequenceClassification": "modeling_distilbert_ane.
|
10 |
},
|
11 |
"dim": 768,
|
12 |
"dropout": 0.1,
|
@@ -22,7 +22,7 @@
|
|
22 |
"POSITIVE": 1
|
23 |
},
|
24 |
"max_position_embeddings": 512,
|
25 |
-
"model_type": "
|
26 |
"n_heads": 12,
|
27 |
"n_layers": 6,
|
28 |
"output_past": true,
|
|
|
1 |
{
|
2 |
"activation": "gelu",
|
3 |
"architectures": [
|
4 |
+
"DistilBertForSequenceClassification"
|
5 |
],
|
6 |
"attention_dropout": 0.1,
|
7 |
"auto_map": {
|
8 |
+
"AutoConfig": "configuration_distilbert_ane.DistilBertConfig",
|
9 |
+
"AutoModelForSequenceClassification": "modeling_distilbert_ane.DistilBertForSequenceClassification"
|
10 |
},
|
11 |
"dim": 768,
|
12 |
"dropout": 0.1,
|
|
|
22 |
"POSITIVE": 1
|
23 |
},
|
24 |
"max_position_embeddings": 512,
|
25 |
+
"model_type": "distilbert",
|
26 |
"n_heads": 12,
|
27 |
"n_layers": 6,
|
28 |
"output_past": true,
|
configuration_distilbert_ane.py
CHANGED
@@ -1,7 +1,5 @@
|
|
1 |
-
from transformers import
|
2 |
-
|
3 |
-
class DistilBertConfig_ANE(DistilBertConfig):
|
4 |
-
model_type = "distilbert_ane"
|
5 |
|
|
|
6 |
def __init__(self, **kwargs):
|
7 |
super().__init__(**kwargs)
|
|
|
1 |
+
from transformers.models.distilbert import configuration_distilbert
|
|
|
|
|
|
|
2 |
|
3 |
+
class DistilBertConfig(configuration_distilbert.DistilBertConfig):
|
4 |
def __init__(self, **kwargs):
|
5 |
super().__init__(**kwargs)
|
modeling_distilbert_ane.py
CHANGED
@@ -43,7 +43,7 @@ import torch
|
|
43 |
import torch.nn as nn
|
44 |
|
45 |
from transformers.models.distilbert import modeling_distilbert
|
46 |
-
from .configuration_distilbert_ane import
|
47 |
|
48 |
# Note: Original implementation of distilbert uses an epsilon value of 1e-12
|
49 |
# which is not friendly with the float16 precision that ANE uses by default
|
@@ -317,8 +317,8 @@ class Transformer(modeling_distilbert.Transformer):
|
|
317 |
[TransformerBlock(config) for _ in range(config.n_layers)]))
|
318 |
|
319 |
|
320 |
-
class
|
321 |
-
config_class =
|
322 |
|
323 |
def __init__(self, config):
|
324 |
super().__init__(config)
|
@@ -332,14 +332,14 @@ class DistilBertModel_ANE(modeling_distilbert.DistilBertModel):
|
|
332 |
raise NotImplementedError
|
333 |
|
334 |
|
335 |
-
class
|
336 |
-
config_class =
|
337 |
|
338 |
def __init__(self, config):
|
339 |
super().__init__(config)
|
340 |
from transformers.activations import get_activation
|
341 |
setattr(self, 'activation', get_activation(config.activation))
|
342 |
-
setattr(self, 'distilbert',
|
343 |
setattr(self, 'vocab_transform', nn.Conv2d(config.dim, config.dim, 1))
|
344 |
setattr(self, 'vocab_layer_norm', LayerNormANE(config.dim, eps=EPS))
|
345 |
setattr(self, 'vocab_projector',
|
@@ -390,13 +390,13 @@ class DistilBertForMaskedLM_ANE(modeling_distilbert.DistilBertForMaskedLM):
|
|
390 |
return ((mlm_loss, ) + output) if mlm_loss is not None else output
|
391 |
|
392 |
|
393 |
-
class
|
394 |
modeling_distilbert.DistilBertForSequenceClassification):
|
395 |
-
config_class =
|
396 |
|
397 |
def __init__(self, config):
|
398 |
super().__init__(config)
|
399 |
-
setattr(self, 'distilbert',
|
400 |
setattr(self, 'pre_classifier', nn.Conv2d(config.dim, config.dim, 1))
|
401 |
setattr(self, 'classifier', nn.Conv2d(config.dim, config.num_labels,
|
402 |
1))
|
@@ -441,13 +441,13 @@ class DistilBertForSequenceClassification_ANE(
|
|
441 |
return ((loss, ) + output) if loss is not None else output
|
442 |
|
443 |
|
444 |
-
class
|
445 |
modeling_distilbert.DistilBertForQuestionAnswering):
|
446 |
-
config_class =
|
447 |
|
448 |
def __init__(self, config):
|
449 |
super().__init__(config)
|
450 |
-
setattr(self, 'distilbert',
|
451 |
setattr(self, 'qa_outputs', nn.Conv2d(config.dim, config.num_labels,
|
452 |
1))
|
453 |
|
@@ -497,12 +497,12 @@ class DistilBertForQuestionAnswering_ANE(
|
|
497 |
return ((total_loss, ) + output) if total_loss is not None else output
|
498 |
|
499 |
|
500 |
-
class
|
501 |
modeling_distilbert.DistilBertForTokenClassification):
|
502 |
|
503 |
def __init__(self, config):
|
504 |
super().__init__(config)
|
505 |
-
setattr(self, 'distilbert',
|
506 |
setattr(self, 'classifier',
|
507 |
nn.Conv2d(config.hidden_size, config.num_labels, 1))
|
508 |
|
@@ -544,13 +544,13 @@ class DistilBertForTokenClassification_ANE(
|
|
544 |
return ((loss, ) + output) if loss is not None else output
|
545 |
|
546 |
|
547 |
-
class
|
548 |
modeling_distilbert.DistilBertForMultipleChoice):
|
549 |
-
config_class =
|
550 |
|
551 |
def __init__(self, config):
|
552 |
super().__init__(config)
|
553 |
-
setattr(self, 'distilbert',
|
554 |
setattr(self, 'pre_classifier', nn.Conv2d(config.dim, config.dim, 1))
|
555 |
setattr(self, 'classifier', nn.Conv2d(config.dim, 1, 1))
|
556 |
|
|
|
43 |
import torch.nn as nn
|
44 |
|
45 |
from transformers.models.distilbert import modeling_distilbert
|
46 |
+
from .configuration_distilbert_ane import DistilBertConfig
|
47 |
|
48 |
# Note: Original implementation of distilbert uses an epsilon value of 1e-12
|
49 |
# which is not friendly with the float16 precision that ANE uses by default
|
|
|
317 |
[TransformerBlock(config) for _ in range(config.n_layers)]))
|
318 |
|
319 |
|
320 |
+
class DistilBertModel(modeling_distilbert.DistilBertModel):
|
321 |
+
config_class = DistilBertConfig
|
322 |
|
323 |
def __init__(self, config):
|
324 |
super().__init__(config)
|
|
|
332 |
raise NotImplementedError
|
333 |
|
334 |
|
335 |
+
class DistilBertForMaskedLM(modeling_distilbert.DistilBertForMaskedLM):
|
336 |
+
config_class = DistilBertConfig
|
337 |
|
338 |
def __init__(self, config):
|
339 |
super().__init__(config)
|
340 |
from transformers.activations import get_activation
|
341 |
setattr(self, 'activation', get_activation(config.activation))
|
342 |
+
setattr(self, 'distilbert', DistilBertModel(config))
|
343 |
setattr(self, 'vocab_transform', nn.Conv2d(config.dim, config.dim, 1))
|
344 |
setattr(self, 'vocab_layer_norm', LayerNormANE(config.dim, eps=EPS))
|
345 |
setattr(self, 'vocab_projector',
|
|
|
390 |
return ((mlm_loss, ) + output) if mlm_loss is not None else output
|
391 |
|
392 |
|
393 |
+
class DistilBertForSequenceClassification(
|
394 |
modeling_distilbert.DistilBertForSequenceClassification):
|
395 |
+
config_class = DistilBertConfig
|
396 |
|
397 |
def __init__(self, config):
|
398 |
super().__init__(config)
|
399 |
+
setattr(self, 'distilbert', DistilBertModel(config))
|
400 |
setattr(self, 'pre_classifier', nn.Conv2d(config.dim, config.dim, 1))
|
401 |
setattr(self, 'classifier', nn.Conv2d(config.dim, config.num_labels,
|
402 |
1))
|
|
|
441 |
return ((loss, ) + output) if loss is not None else output
|
442 |
|
443 |
|
444 |
+
class DistilBertForQuestionAnswering(
|
445 |
modeling_distilbert.DistilBertForQuestionAnswering):
|
446 |
+
config_class = DistilBertConfig
|
447 |
|
448 |
def __init__(self, config):
|
449 |
super().__init__(config)
|
450 |
+
setattr(self, 'distilbert', DistilBertModel(config))
|
451 |
setattr(self, 'qa_outputs', nn.Conv2d(config.dim, config.num_labels,
|
452 |
1))
|
453 |
|
|
|
497 |
return ((total_loss, ) + output) if total_loss is not None else output
|
498 |
|
499 |
|
500 |
+
class DistilBertForTokenClassification(
|
501 |
modeling_distilbert.DistilBertForTokenClassification):
|
502 |
|
503 |
def __init__(self, config):
|
504 |
super().__init__(config)
|
505 |
+
setattr(self, 'distilbert', DistilBertModel(config))
|
506 |
setattr(self, 'classifier',
|
507 |
nn.Conv2d(config.hidden_size, config.num_labels, 1))
|
508 |
|
|
|
544 |
return ((loss, ) + output) if loss is not None else output
|
545 |
|
546 |
|
547 |
+
class DistilBertForMultipleChoice(
|
548 |
modeling_distilbert.DistilBertForMultipleChoice):
|
549 |
+
config_class = DistilBertConfig
|
550 |
|
551 |
def __init__(self, config):
|
552 |
super().__init__(config)
|
553 |
+
setattr(self, 'distilbert', DistilBertModel(config))
|
554 |
setattr(self, 'pre_classifier', nn.Conv2d(config.dim, config.dim, 1))
|
555 |
setattr(self, 'classifier', nn.Conv2d(config.dim, 1, 1))
|
556 |
|