Upload model
Browse files- config.json +57 -0
- model.safetensors +3 -0
- modeling_CustomLEDForResultsId.py +44 -0
config.json
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "allenai/led-base-16384",
|
3 |
+
"activation_dropout": 0.0,
|
4 |
+
"activation_function": "gelu",
|
5 |
+
"architectures": [
|
6 |
+
"CustomLEDForResultsIdModel"
|
7 |
+
],
|
8 |
+
"attention_dropout": 0.0,
|
9 |
+
"attention_window": [
|
10 |
+
1024,
|
11 |
+
1024,
|
12 |
+
1024,
|
13 |
+
1024,
|
14 |
+
1024,
|
15 |
+
1024
|
16 |
+
],
|
17 |
+
"auto_map": {
|
18 |
+
"AutoModel": "modeling_CustomLEDForResultsId.CustomLEDForResultsIdModel"
|
19 |
+
},
|
20 |
+
"bos_token_id": 0,
|
21 |
+
"classif_dropout": 0.0,
|
22 |
+
"classifier_dropout": 0.0,
|
23 |
+
"d_model": 768,
|
24 |
+
"decoder_attention_heads": 12,
|
25 |
+
"decoder_ffn_dim": 3072,
|
26 |
+
"decoder_layerdrop": 0.0,
|
27 |
+
"decoder_layers": 6,
|
28 |
+
"decoder_start_token_id": 2,
|
29 |
+
"dropout": 0.1,
|
30 |
+
"encoder_attention_heads": 12,
|
31 |
+
"encoder_ffn_dim": 3072,
|
32 |
+
"encoder_layerdrop": 0.0,
|
33 |
+
"encoder_layers": 6,
|
34 |
+
"eos_token_id": 2,
|
35 |
+
"gradient_checkpointing": false,
|
36 |
+
"id2label": {
|
37 |
+
"0": "LABEL_0",
|
38 |
+
"1": "LABEL_1",
|
39 |
+
"2": "LABEL_2"
|
40 |
+
},
|
41 |
+
"init_std": 0.02,
|
42 |
+
"is_encoder_decoder": true,
|
43 |
+
"label2id": {
|
44 |
+
"LABEL_0": 0,
|
45 |
+
"LABEL_1": 1,
|
46 |
+
"LABEL_2": 2
|
47 |
+
},
|
48 |
+
"max_decoder_position_embeddings": 1024,
|
49 |
+
"max_encoder_position_embeddings": 16384,
|
50 |
+
"model_type": "led",
|
51 |
+
"num_hidden_layers": 6,
|
52 |
+
"pad_token_id": 1,
|
53 |
+
"torch_dtype": "float32",
|
54 |
+
"transformers_version": "4.35.2",
|
55 |
+
"use_cache": true,
|
56 |
+
"vocab_size": 50265
|
57 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d7d1adfe6f64aa4a7d99d95064349361812c3ececce79dbb12a8e4a005459b0f
|
3 |
+
size 417408708
|
modeling_CustomLEDForResultsId.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# from transformers.models.led.modeling_led import LEDEncoder
|
2 |
+
from transformers import LEDConfig, LEDModel, LEDPreTrainedModel
|
3 |
+
from transformers.modeling_outputs import TokenClassifierOutput
|
4 |
+
|
5 |
+
import torch.nn as nn
|
6 |
+
|
7 |
+
class CustomLEDForResultsIdModel(LEDPreTrainedModel):
|
8 |
+
def __init__(self, config: LEDConfig, checkpoint):
|
9 |
+
super().__init__(config)
|
10 |
+
self.num_labels = config.num_labels
|
11 |
+
print("Configs")
|
12 |
+
print(config.num_labels)
|
13 |
+
print(config.dropout)
|
14 |
+
|
15 |
+
#Load Model with given checkpoint and extract its body
|
16 |
+
if (checkpoint):
|
17 |
+
self.led = LEDModel.from_pretrained(checkpoint, config=config).get_encoder()
|
18 |
+
else:
|
19 |
+
self.led = LEDModel(config).get_encoder()
|
20 |
+
|
21 |
+
# self.model = LEDEncoder.from_pretrained(checkpoint, config=config)
|
22 |
+
self.dropout = nn.Dropout(config.dropout)
|
23 |
+
self.classifier = nn.Linear(self.led.config.d_model,self.num_labels) # load and initialize weights
|
24 |
+
|
25 |
+
def forward(self, input_ids=None, attention_mask=None, labels=None, global_attention_mask=None, return_loss=True):
|
26 |
+
#Extract outputs from the body
|
27 |
+
outputs = self.led(input_ids=input_ids, attention_mask=attention_mask, global_attention_mask=global_attention_mask)
|
28 |
+
|
29 |
+
sequence_output = self.dropout(outputs.last_hidden_state)
|
30 |
+
logits = self.classifier(sequence_output) # calculate losses [:,0,:].view(-1,768)
|
31 |
+
|
32 |
+
# start_logits, end_logits = logits.split(1, dim=-1)
|
33 |
+
# start_logits = start_logits.squeeze(-1).contiguous()
|
34 |
+
# end_logits = end_logits.squeeze(-1).contiguous()
|
35 |
+
|
36 |
+
loss = None
|
37 |
+
if labels is not None:
|
38 |
+
loss_fct = nn.CrossEntropyLoss()
|
39 |
+
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
40 |
+
|
41 |
+
return {
|
42 |
+
'loss': loss,
|
43 |
+
'logits': logits
|
44 |
+
}
|