mikesun112233 commited on
Commit
77d1f89
1 Parent(s): 0c13b44
README.md ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Model name
2
+
3
+ ## Model description
4
+
5
+ This model is a sequence-to-sequence question generator which takes an answer and context as an input, and generates a question as an output. It is based on a pretrained `t5-base` model.
6
+
7
+ ## Intended uses & limitations
8
+
9
+ The model is trained to generate reading comprehension-style questions with answers extracted from a text. The model performs best with full sentence answers, but can also be used with single word or short phrase answers.
10
+
11
+ #### How to use
12
+
13
+ The model takes concatenated answers and context as an input sequence, and will generate a full question sentence as an output sequence. The max sequence length is 512 tokens. Inputs should be organised into the following format:
14
+ ```
15
+ <answer> answer text here <context> context text here
16
+ ```
17
+ The input sequence can then be encoded and passed as the `input_ids` argument in the model's `generate()` method.
18
+
19
+ For best results, a large number of questions can be generated, and then filtered using [iarfmoose/bert-base-cased-qa-evaluator](https://huggingface.co/iarfmoose/bert-base-cased-qa-evaluator).
20
+
21
+ For examples, please see https://github.com/iarfmoose/question_generator.
22
+
23
+ #### Limitations and bias
24
+
25
+ The model is limited to generating questions in the same style as those found in [SQuAD](https://rajpurkar.github.io/SQuAD-explorer/), [CoQA](https://stanfordnlp.github.io/coqa/), and [MSMARCO](https://microsoft.github.io/msmarco/). The generated questions can potentially be leading or reflect biases that are present in the context. If the context is too short or completely absent, or if the context and answer do not match, the generated question is likely to be incoherent.
26
+
27
+ ## Training data
28
+
29
+ The model was fine-tuned on a dataset made up of several well-known QA datasets ([SQuAD](https://rajpurkar.github.io/SQuAD-explorer/), [CoQA](https://stanfordnlp.github.io/coqa/), and [MSMARCO](https://microsoft.github.io/msmarco/)). The datasets were restructured by concatenating the answer and context fields into the previously-mentioned format. The question field from the datasets was used as the target during training. The full training set was roughly 200,000 examples.
30
+
31
+ ## Training procedure
32
+
33
+ The model was trained for 20 epochs over the training set with a learning rate of 1e-3. The batch size was only 4 due to GPU memory limitations when training on Google Colab.
added_tokens.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"<answer>": 32100, "<context>": 32101}
config.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "T5ForConditionalGeneration"
4
+ ],
5
+ "d_ff": 3072,
6
+ "d_kv": 64,
7
+ "d_model": 768,
8
+ "decoder_start_token_id": 0,
9
+ "dropout_rate": 0.1,
10
+ "eos_token_id": 1,
11
+ "initializer_factor": 1.0,
12
+ "is_encoder_decoder": true,
13
+ "layer_norm_epsilon": 1e-06,
14
+ "model_type": "t5",
15
+ "n_positions": 512,
16
+ "num_heads": 12,
17
+ "num_layers": 12,
18
+ "output_past": true,
19
+ "pad_token_id": 0,
20
+ "relative_attention_num_buckets": 32,
21
+ "task_specific_params": {
22
+ "summarization": {
23
+ "early_stopping": true,
24
+ "length_penalty": 2.0,
25
+ "max_length": 200,
26
+ "min_length": 30,
27
+ "no_repeat_ngram_size": 3,
28
+ "num_beams": 4,
29
+ "prefix": "summarize: "
30
+ },
31
+ "translation_en_to_de": {
32
+ "early_stopping": true,
33
+ "max_length": 300,
34
+ "num_beams": 4,
35
+ "prefix": "translate English to German: "
36
+ },
37
+ "translation_en_to_fr": {
38
+ "early_stopping": true,
39
+ "max_length": 300,
40
+ "num_beams": 4,
41
+ "prefix": "translate English to French: "
42
+ },
43
+ "translation_en_to_ro": {
44
+ "early_stopping": true,
45
+ "max_length": 300,
46
+ "num_beams": 4,
47
+ "prefix": "translate English to Romanian: "
48
+ }
49
+ },
50
+ "vocab_size": 32102
51
+ }
flax_model.msgpack ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9421122c978cae24dbbb43a97c30a2a890ddbd8c14e960ac0d6058b18b759c2f
3
+ size 891545476
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8c0b7392e5f8c408d67cac8414837e899a09fc6903595e1d62aa6fc810d3480
3
+ size 891612736
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "additional_special_tokens": ["<answer>", "<context>"]}
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
+ size 791656
tf_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a42722f74b26b9f7b718c6b2922cab46bcfc1d59bb4af4cdbcd4f09eec2b1d0
3
+ size 892068080
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model_max_length": 512}