Upload 7 files
Browse files- README.md +12 -0
- config.json +123 -0
- metrics.json +27 -0
- model.th +0 -0
- vocabulary/non_padded_namespaces.txt +2 -0
- vocabulary/tokens.txt +0 -0
- weights.th +0 -0
README.md
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
language: en
|
3 |
+
tags:
|
4 |
+
- allennlp
|
5 |
+
- question-answering
|
6 |
+
---
|
7 |
+
|
8 |
+
This is an implementation of the BiDAF model with ELMo embeddings. The basic layout is pretty simple: encode words as a combination of word embeddings and a character-level encoder, pass the word representations through a bi-LSTM/GRU, use a matrix of attentions to put question information into the passage word representations (this is the only part that is at all non-standard), pass this through another few layers of bi-LSTMs/GRUs, and do a softmax over span start and span end.
|
9 |
+
|
10 |
+
CAVEATS:
|
11 |
+
------
|
12 |
+
This model is based on ELMo. ELMo is not deterministic, meaning that you will see slight differences every time you run it. Also, ELMo likes to be warmed up, so we recommend processing dummy input before processing real workloads with it.
|
config.json
ADDED
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_reader": {
|
3 |
+
"type": "squad",
|
4 |
+
"token_indexers": {
|
5 |
+
"elmo": {
|
6 |
+
"type": "elmo_characters"
|
7 |
+
},
|
8 |
+
"token_characters": {
|
9 |
+
"type": "characters",
|
10 |
+
"character_tokenizer": {
|
11 |
+
"byte_encoding": "utf-8",
|
12 |
+
"end_tokens": [
|
13 |
+
260
|
14 |
+
],
|
15 |
+
"start_tokens": [
|
16 |
+
259
|
17 |
+
]
|
18 |
+
},
|
19 |
+
"min_padding_length": 5
|
20 |
+
},
|
21 |
+
"tokens": {
|
22 |
+
"type": "single_id",
|
23 |
+
"lowercase_tokens": true
|
24 |
+
}
|
25 |
+
}
|
26 |
+
},
|
27 |
+
"model": {
|
28 |
+
"type": "bidaf",
|
29 |
+
"dropout": 0.2,
|
30 |
+
"matrix_attention": {
|
31 |
+
"type": "linear",
|
32 |
+
"combination": "x,y,x*y",
|
33 |
+
"tensor_1_dim": 200,
|
34 |
+
"tensor_2_dim": 200
|
35 |
+
},
|
36 |
+
"modeling_layer": {
|
37 |
+
"type": "lstm",
|
38 |
+
"bidirectional": true,
|
39 |
+
"dropout": 0.2,
|
40 |
+
"hidden_size": 100,
|
41 |
+
"input_size": 800,
|
42 |
+
"num_layers": 2
|
43 |
+
},
|
44 |
+
"num_highway_layers": 2,
|
45 |
+
"phrase_layer": {
|
46 |
+
"type": "lstm",
|
47 |
+
"bidirectional": true,
|
48 |
+
"hidden_size": 100,
|
49 |
+
"input_size": 1224,
|
50 |
+
"num_layers": 1
|
51 |
+
},
|
52 |
+
"span_end_encoder": {
|
53 |
+
"type": "lstm",
|
54 |
+
"bidirectional": true,
|
55 |
+
"hidden_size": 100,
|
56 |
+
"input_size": 1400,
|
57 |
+
"num_layers": 1
|
58 |
+
},
|
59 |
+
"text_field_embedder": {
|
60 |
+
"token_embedders": {
|
61 |
+
"elmo": {
|
62 |
+
"type": "elmo_token_embedder",
|
63 |
+
"do_layer_norm": false,
|
64 |
+
"dropout": 0,
|
65 |
+
"options_file": "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json",
|
66 |
+
"weight_file": "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5"
|
67 |
+
},
|
68 |
+
"token_characters": {
|
69 |
+
"type": "character_encoding",
|
70 |
+
"dropout": 0.2,
|
71 |
+
"embedding": {
|
72 |
+
"embedding_dim": 16,
|
73 |
+
"num_embeddings": 262
|
74 |
+
},
|
75 |
+
"encoder": {
|
76 |
+
"type": "cnn",
|
77 |
+
"embedding_dim": 16,
|
78 |
+
"ngram_filter_sizes": [
|
79 |
+
5
|
80 |
+
],
|
81 |
+
"num_filters": 100
|
82 |
+
}
|
83 |
+
},
|
84 |
+
"tokens": {
|
85 |
+
"type": "embedding",
|
86 |
+
"embedding_dim": 100,
|
87 |
+
"pretrained_file": "https://allennlp.s3.amazonaws.com/datasets/glove/glove.6B.100d.txt.gz",
|
88 |
+
"trainable": false
|
89 |
+
}
|
90 |
+
}
|
91 |
+
}
|
92 |
+
},
|
93 |
+
"train_data_path": "https://allennlp.s3.amazonaws.com/datasets/squad/squad-train-v1.1.json",
|
94 |
+
"validation_data_path": "https://allennlp.s3.amazonaws.com/datasets/squad/squad-dev-v1.1.json",
|
95 |
+
"trainer": {
|
96 |
+
"callbacks": [
|
97 |
+
"tensorboard"
|
98 |
+
],
|
99 |
+
"grad_norm": 5,
|
100 |
+
"learning_rate_scheduler": {
|
101 |
+
"type": "reduce_on_plateau",
|
102 |
+
"factor": 0.5,
|
103 |
+
"mode": "max",
|
104 |
+
"patience": 2
|
105 |
+
},
|
106 |
+
"num_epochs": 20,
|
107 |
+
"optimizer": {
|
108 |
+
"type": "adam",
|
109 |
+
"betas": [
|
110 |
+
0.9,
|
111 |
+
0.9
|
112 |
+
]
|
113 |
+
},
|
114 |
+
"patience": 10,
|
115 |
+
"validation_metric": "+em"
|
116 |
+
},
|
117 |
+
"data_loader": {
|
118 |
+
"batch_sampler": {
|
119 |
+
"type": "bucket",
|
120 |
+
"batch_size": 40
|
121 |
+
}
|
122 |
+
}
|
123 |
+
}
|
metrics.json
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_epoch": 4,
|
3 |
+
"peak_worker_0_memory_MB": 39696.53125,
|
4 |
+
"peak_gpu_0_memory_MB": 7284.326171875,
|
5 |
+
"training_duration": "4:30:57.670185",
|
6 |
+
"epoch": 14,
|
7 |
+
"training_start_acc": 0.9643260767816985,
|
8 |
+
"training_end_acc": 0.9655932145344125,
|
9 |
+
"training_span_acc": 0.9391088939371454,
|
10 |
+
"training_em": 0.9497482847977716,
|
11 |
+
"training_f1": 0.9744178909985174,
|
12 |
+
"training_loss": 0.20648720288929873,
|
13 |
+
"training_worker_0_memory_MB": 39696.53125,
|
14 |
+
"training_gpu_0_memory_MB": 7283.4765625,
|
15 |
+
"validation_start_acc": 0.6269631031220435,
|
16 |
+
"validation_end_acc": 0.6750236518448439,
|
17 |
+
"validation_span_acc": 0.535666982024598,
|
18 |
+
"validation_em": 0.6783349101229896,
|
19 |
+
"validation_f1": 0.7866344427692674,
|
20 |
+
"validation_loss": 4.767929854482975,
|
21 |
+
"best_validation_start_acc": 0.6688741721854304,
|
22 |
+
"best_validation_end_acc": 0.7033112582781457,
|
23 |
+
"best_validation_span_acc": 0.5831598864711447,
|
24 |
+
"best_validation_em": 0.7221381267738883,
|
25 |
+
"best_validation_f1": 0.8105977611457257,
|
26 |
+
"best_validation_loss": 2.2097141022952096
|
27 |
+
}
|
model.th
ADDED
Binary file (41 Bytes). View file
|
|
vocabulary/non_padded_namespaces.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
*labels
|
2 |
+
*tags
|
vocabulary/tokens.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
weights.th
ADDED
Binary file (41 Bytes). View file
|
|