shahrukhx01 commited on
Commit
7e981bc
1 Parent(s): 6f081cf
config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "deepset/roberta-base-squad2",
3
+ "architectures": [
4
+ "RobertaForQuestionAnswering"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "gradient_checkpointing": false,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "language": "english",
17
+ "layer_norm_eps": 1e-05,
18
+ "max_position_embeddings": 514,
19
+ "model_type": "roberta",
20
+ "name": "Roberta",
21
+ "num_attention_heads": 12,
22
+ "num_hidden_layers": 12,
23
+ "pad_token_id": 1,
24
+ "position_embedding_type": "absolute",
25
+ "problem_type": "single_label_classification",
26
+ "transformers_version": "4.10.3",
27
+ "type_vocab_size": 1,
28
+ "use_cache": true,
29
+ "vocab_size": 50265
30
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
multitask_model.py ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Implementation borrowed from transformers package and extended to support multiple prediction heads:
3
+
4
+ https://github.com/huggingface/transformers/blob/master/src/transformers/models/bert/modeling_bert.py
5
+ """
6
+
7
+ import math
8
+
9
+ import torch
10
+ import torch.utils.checkpoint
11
+ from packaging import version
12
+ from torch import nn
13
+ from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
14
+
15
+ from transformers import PretrainedConfig
16
+ from transformers.activations import ACT2FN, gelu
17
+ from transformers.file_utils import (
18
+ add_code_sample_docstrings,
19
+ add_start_docstrings,
20
+ add_start_docstrings_to_model_forward,
21
+ replace_return_docstrings,
22
+ )
23
+ from transformers.modeling_outputs import (
24
+ BaseModelOutputWithPastAndCrossAttentions,
25
+ BaseModelOutputWithPoolingAndCrossAttentions,
26
+ CausalLMOutputWithCrossAttentions,
27
+ MaskedLMOutput,
28
+ MultipleChoiceModelOutput,
29
+ QuestionAnsweringModelOutput,
30
+ SequenceClassifierOutput,
31
+ TokenClassifierOutput,
32
+ )
33
+ from transformers.modeling_utils import (
34
+ PreTrainedModel,
35
+ apply_chunking_to_forward,
36
+ find_pruneable_heads_and_indices,
37
+ prune_linear_layer,
38
+ )
39
+ from transformers.utils import logging
40
+
41
+ # from transformers.models.roberta.configuration_roberta import RobertaConfig
42
+
43
+
44
+ logger = logging.get_logger(__name__)
45
+
46
+ _CHECKPOINT_FOR_DOC = "roberta-base"
47
+ _CONFIG_FOR_DOC = "RobertaConfig"
48
+ _TOKENIZER_FOR_DOC = "RobertaTokenizer"
49
+
50
+ from transformers.models.roberta.modeling_roberta import (
51
+ RobertaPreTrainedModel,
52
+ RobertaClassificationHead,
53
+ RobertaModel,
54
+ )
55
+
56
+
57
+ class RobertaClassificationHead(nn.Module):
58
+ """Head for sentence-level classification tasks."""
59
+
60
+ def __init__(self, config, num_labels):
61
+ super().__init__()
62
+ self.dense = nn.Linear(config.hidden_size, config.hidden_size)
63
+ classifier_dropout = (
64
+ config.classifier_dropout
65
+ if config.classifier_dropout is not None
66
+ else config.hidden_dropout_prob
67
+ )
68
+ self.dropout = nn.Dropout(classifier_dropout)
69
+ self.out_proj = nn.Linear(config.hidden_size, num_labels)
70
+
71
+ def forward(self, features, **kwargs):
72
+ x = features[:, 0, :] # take <s> token (equiv. to [CLS])
73
+ x = self.dropout(x)
74
+ x = self.dense(x)
75
+ x = torch.tanh(x)
76
+ x = self.dropout(x)
77
+ x = self.out_proj(x)
78
+ return x
79
+
80
+
81
+ class RobertaForMultitaskQA(RobertaPreTrainedModel):
82
+ _keys_to_ignore_on_load_missing = [r"position_ids"]
83
+
84
+ def __init__(self, config, **kwargs):
85
+ super().__init__(PretrainedConfig())
86
+ self.num_labels = kwargs.get("task_labels_map", {})
87
+ self.config = config
88
+
89
+ self.roberta = RobertaModel(config, add_pooling_layer=False)
90
+ ## for squad2 QA task
91
+ self.qa_outputs = nn.Linear(
92
+ config.hidden_size, list(self.num_labels.values())[0]
93
+ )
94
+ ## for boolq
95
+ self.classifier = RobertaClassificationHead(
96
+ config, num_labels=list(self.num_labels.values())[1]
97
+ )
98
+
99
+ self.init_weights()
100
+
101
+ def forward(
102
+ self,
103
+ input_ids=None,
104
+ attention_mask=None,
105
+ token_type_ids=None,
106
+ position_ids=None,
107
+ head_mask=None,
108
+ inputs_embeds=None,
109
+ labels=None,
110
+ start_positions=None,
111
+ end_positions=None,
112
+ output_attentions=None,
113
+ output_hidden_states=None,
114
+ return_dict=None,
115
+ task_name=None,
116
+ ):
117
+ r"""
118
+ labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
119
+ Labels for computing the sequence classification/regression loss. Indices should be in :obj:`[0, ...,
120
+ config.num_labels - 1]`. If :obj:`config.num_labels == 1` a regression loss is computed (Mean-Square loss),
121
+ If :obj:`config.num_labels > 1` a classification loss is computed (Cross-Entropy).
122
+ """
123
+ return_dict = (
124
+ return_dict if return_dict is not None else self.config.use_return_dict
125
+ )
126
+
127
+ outputs = self.roberta(
128
+ input_ids,
129
+ attention_mask=attention_mask,
130
+ token_type_ids=token_type_ids,
131
+ position_ids=position_ids,
132
+ head_mask=head_mask,
133
+ inputs_embeds=inputs_embeds,
134
+ output_attentions=output_attentions,
135
+ output_hidden_states=output_hidden_states,
136
+ return_dict=return_dict,
137
+ )
138
+ sequence_output = outputs[0]
139
+
140
+ logits = self.qa_outputs(sequence_output)
141
+ start_logits, end_logits = logits.split(1, dim=-1)
142
+ start_logits = start_logits.squeeze(-1).contiguous()
143
+ end_logits = end_logits.squeeze(-1).contiguous()
144
+
145
+ total_loss = None
146
+ if start_positions is not None and end_positions is not None:
147
+ # If we are on multi-GPU, split add a dimension
148
+ if len(start_positions.size()) > 1:
149
+ start_positions = start_positions.squeeze(-1)
150
+ if len(end_positions.size()) > 1:
151
+ end_positions = end_positions.squeeze(-1)
152
+ # sometimes the start/end positions are outside our model inputs, we ignore these terms
153
+ ignored_index = start_logits.size(1)
154
+ start_positions = start_positions.clamp(0, ignored_index)
155
+ end_positions = end_positions.clamp(0, ignored_index)
156
+
157
+ loss_fct = CrossEntropyLoss(ignore_index=ignored_index)
158
+ start_loss = loss_fct(start_logits, start_positions)
159
+ end_loss = loss_fct(end_logits, end_positions)
160
+ total_loss = (start_loss + end_loss) / 2
161
+
162
+ if not return_dict:
163
+ output = (start_logits, end_logits) + outputs[2:]
164
+ return ((total_loss,) + output) if total_loss is not None else output
165
+
166
+ qa_result = QuestionAnsweringModelOutput(
167
+ loss=total_loss,
168
+ start_logits=start_logits,
169
+ end_logits=end_logits,
170
+ hidden_states=outputs.hidden_states,
171
+ attentions=outputs.attentions,
172
+ )
173
+
174
+
175
+ loss = None
176
+ logits = self.classifier(sequence_output)
177
+
178
+ if labels is not None:
179
+ if self.config.problem_type is None:
180
+ if list(self.num_labels.values())[1] == 1:
181
+ self.config.problem_type = "regression"
182
+ elif list(self.num_labels.values())[1] > 1 and (
183
+ labels.dtype == torch.long or labels.dtype == torch.int
184
+ ):
185
+ self.config.problem_type = "single_label_classification"
186
+ else:
187
+ self.config.problem_type = "multi_label_classification"
188
+
189
+ if self.config.problem_type == "regression":
190
+ loss_fct = MSELoss()
191
+ if list(self.num_labels.values())[1] == 1:
192
+ loss = loss_fct(logits.squeeze(), labels.squeeze())
193
+ else:
194
+ loss = loss_fct(logits, labels)
195
+ elif self.config.problem_type == "single_label_classification":
196
+ loss_fct = CrossEntropyLoss()
197
+ loss = loss_fct(
198
+ logits.view(-1, list(self.num_labels.values())[1]),
199
+ labels.view(-1),
200
+ )
201
+ elif self.config.problem_type == "multi_label_classification":
202
+ loss_fct = BCEWithLogitsLoss()
203
+ loss = loss_fct(logits, labels)
204
+
205
+ if not return_dict:
206
+ output = (logits,) + outputs[2:]
207
+ return ((loss,) + output) if loss is not None else output
208
+
209
+ classifier_result = SequenceClassifierOutput(
210
+ loss=loss,
211
+ logits=logits,
212
+ hidden_states=outputs.hidden_states,
213
+ attentions=outputs.attentions,
214
+ )
215
+
216
+ return qa_result, classifier_result
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9773e9c966c632820ced6567ba3ce07dd02a5afc997c6614884b82642de244b
3
+ size 498689179
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}}
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "errors": "replace", "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "do_lower_case": false, "model_max_length": 512, "full_tokenizer_file": null, "special_tokens_map_file": "/root/.cache/huggingface/transformers/c9d2c178fac8d40234baa1833a3b1903d393729bf93ea34da247c07db24900d0.cb2244924ab24d706b02fd7fcedaea4531566537687a539ebb94db511fd122a0", "name_or_path": "deepset/roberta-base-squad2", "tokenizer_class": "RobertaTokenizer"}
vocab.json ADDED
The diff for this file is too large to render. See raw diff