qilowoq commited on
Commit
9c798de
1 Parent(s): 5b62b28

Upload model

Browse files
Files changed (3) hide show
  1. AbLang_roberta_model.py +41 -0
  2. config.json +5 -3
  3. pytorch_model.bin +2 -2
AbLang_roberta_model.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers.models.roberta.modeling_roberta import RobertaEmbeddings, RobertaModel, RobertaForMaskedLM
2
+ from typing import Optional
3
+ import torch
4
+
5
+ class RobertaEmbeddingsV2(RobertaEmbeddings):
6
+ def __init__(self, config):
7
+ super().__init__(config)
8
+ self.pad_token_id = config.pad_token_id
9
+ self.position_embeddings = torch.nn.Embedding(config.max_position_embeddings, config.hidden_size, padding_idx=0) # here padding_idx is always 0
10
+
11
+ def forward(
12
+ self,
13
+ input_ids: torch.LongTensor,
14
+ token_type_ids: Optional[torch.LongTensor] = None,
15
+ position_ids: Optional[torch.LongTensor] = None,
16
+ inputs_embeds: Optional[torch.FloatTensor] = None,
17
+ past_key_values_length: int = 0,
18
+ ) -> torch.Tensor:
19
+ inputs_embeds = self.word_embeddings(input_ids)
20
+ position_ids = self.create_position_ids_from_input_ids(input_ids)
21
+ position_embeddings = self.position_embeddings(position_ids)
22
+ embeddings = inputs_embeds + position_embeddings
23
+ return self.dropout(self.LayerNorm(embeddings))
24
+
25
+ def create_position_ids_from_input_ids(self, input_ids: torch.LongTensor) -> torch.Tensor:
26
+ mask = input_ids.ne(self.pad_token_id).int()
27
+ return torch.cumsum(mask, dim=1).long() * mask
28
+
29
+
30
+ class RobertaModelV2(RobertaModel):
31
+ def __init__(self, config, add_pooling_layer=False):
32
+ super().__init__(config, add_pooling_layer=add_pooling_layer)
33
+ self.embeddings = RobertaEmbeddingsV2(config)
34
+
35
+
36
+ class RobertaForMaskedLMV2(RobertaForMaskedLM):
37
+ def __init__(self, config):
38
+ super().__init__(config)
39
+ self.roberta = RobertaModelV2(config, add_pooling_layer=False)
40
+
41
+
config.json CHANGED
@@ -1,13 +1,15 @@
1
  {
2
  "add_pooling_layer": false,
3
  "architectures": [
4
- "BertModelV2"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "auto_map": {
8
- "AutoModel": "AbLang_bert_model.BertModelV2"
9
  },
 
10
  "classifier_dropout": null,
 
11
  "hidden_act": "gelu",
12
  "hidden_dropout_prob": 0.1,
13
  "hidden_size": 768,
@@ -15,7 +17,7 @@
15
  "intermediate_size": 3072,
16
  "layer_norm_eps": 1e-12,
17
  "max_position_embeddings": 160,
18
- "model_type": "bert",
19
  "num_attention_heads": 12,
20
  "num_hidden_layers": 12,
21
  "pad_token_id": 21,
 
1
  {
2
  "add_pooling_layer": false,
3
  "architectures": [
4
+ "RobertaModelV2"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "auto_map": {
8
+ "AutoModel": "AbLang_roberta_model.RobertaModelV2"
9
  },
10
+ "bos_token_id": 0,
11
  "classifier_dropout": null,
12
+ "eos_token_id": 2,
13
  "hidden_act": "gelu",
14
  "hidden_dropout_prob": 0.1,
15
  "hidden_size": 768,
 
17
  "intermediate_size": 3072,
18
  "layer_norm_eps": 1e-12,
19
  "max_position_embeddings": 160,
20
+ "model_type": "roberta",
21
  "num_attention_heads": 12,
22
  "num_hidden_layers": 12,
23
  "pad_token_id": 21,
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1df129a33dc579997effd90fb6ff33b8bcd2f2619fc2726cc524e02f71f4f3e
3
- size 343223341
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:567cab815c99edb6aaa2063648779b8157cedeecd3a2618aeef3bbb7c0d4d848
3
+ size 340860389