Upload model
Browse files- config.json +4 -1
- model.safetensors +1 -1
- modeling_CustomLEDForQA.py +13 -2
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"activation_dropout": 0.0,
|
4 |
"activation_function": "gelu",
|
5 |
"architectures": [
|
@@ -14,6 +14,9 @@
|
|
14 |
1024,
|
15 |
1024
|
16 |
],
|
|
|
|
|
|
|
17 |
"bos_token_id": 0,
|
18 |
"classif_dropout": 0.0,
|
19 |
"classifier_dropout": 0.0,
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "allenai/led-base-16384",
|
3 |
"activation_dropout": 0.0,
|
4 |
"activation_function": "gelu",
|
5 |
"architectures": [
|
|
|
14 |
1024,
|
15 |
1024
|
16 |
],
|
17 |
+
"auto_map": {
|
18 |
+
"AutoModel": "modeling_CustomLEDForQA.CustomLEDForQAModel"
|
19 |
+
},
|
20 |
"bos_token_id": 0,
|
21 |
"classif_dropout": 0.0,
|
22 |
"classifier_dropout": 0.0,
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 417405656
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7de0c365541e0d36d3fe80cb356e4da80915c6c57c12e9caff204628517f478a
|
3 |
size 417405656
|
modeling_CustomLEDForQA.py
CHANGED
@@ -29,12 +29,23 @@ class CustomLEDForQAModel(LEDPreTrainedModel):
|
|
29 |
end_logits = end_logits.squeeze(-1).contiguous()
|
30 |
|
31 |
total_loss = None
|
|
|
32 |
if start_positions is not None and end_positions is not None:
|
33 |
|
34 |
loss_fct = nn.CrossEntropyLoss()
|
35 |
-
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
total_loss = (start_loss + end_loss) / 2
|
|
|
|
|
38 |
|
39 |
|
40 |
return {
|
|
|
29 |
end_logits = end_logits.squeeze(-1).contiguous()
|
30 |
|
31 |
total_loss = None
|
32 |
+
|
33 |
if start_positions is not None and end_positions is not None:
|
34 |
|
35 |
loss_fct = nn.CrossEntropyLoss()
|
36 |
+
|
37 |
+
if len(start_positions.size()) > 1:
|
38 |
+
start_positions = start_positions.squeeze(-1)
|
39 |
+
if len(end_positions.size()) > 1:
|
40 |
+
end_positions = end_positions.squeeze(-1)
|
41 |
+
|
42 |
+
start_loss = loss_fct(start_logits, start_positions)
|
43 |
+
end_loss = loss_fct(end_logits, end_positions)
|
44 |
+
# start_loss = loss_fct(start_logits[index], start_positions[index][0])
|
45 |
+
# end_loss = loss_fct(end_logits[index], end_positions[index][0])
|
46 |
total_loss = (start_loss + end_loss) / 2
|
47 |
+
|
48 |
+
|
49 |
|
50 |
|
51 |
return {
|