update model
Browse files- .gitattributes +1 -0
- added_tokens.json +1 -0
- config.json +60 -0
- eval/metric.first.answer.paragraph_answer.question.asahi417_qg_jaquad.default.json +1 -0
- eval/metric.first.answer.paragraph_sentence.question.asahi417_qg_jaquad.default.json +1 -0
- eval/metric.first.answer.sentence_answer.question.asahi417_qg_jaquad.default.json +1 -0
- eval/metric.first.sentence.paragraph_answer.question.asahi417_qg_jaquad.default.json +1 -0
- eval/metric.first.sentence.paragraph_sentence.question.asahi417_qg_jaquad.default.json +1 -0
- eval/metric.first.sentence.sentence_answer.question.asahi417_qg_jaquad.default.json +1 -0
- eval/metric.last.sentence.paragraph_answer.question.asahi417_qg_jaquad.default.json +1 -0
- eval/metric.last.sentence.paragraph_sentence.question.asahi417_qg_jaquad.default.json +1 -0
- eval/metric.last.sentence.sentence_answer.question.asahi417_qg_jaquad.default.json +1 -0
- eval/metric.long.sentence.paragraph_answer.question.asahi417_qg_jaquad.default.json +1 -0
- eval/metric.long.sentence.paragraph_sentence.question.asahi417_qg_jaquad.default.json +1 -0
- eval/metric.long.sentence.sentence_answer.question.asahi417_qg_jaquad.default.json +1 -0
- eval/metric.middle.sentence.paragraph_answer.question.asahi417_qg_jaquad.default.json +1 -0
- eval/metric.middle.sentence.paragraph_sentence.question.asahi417_qg_jaquad.default.json +1 -0
- eval/metric.middle.sentence.sentence_answer.question.asahi417_qg_jaquad.default.json +1 -0
- eval/metric.short.sentence.paragraph_answer.question.asahi417_qg_jaquad.default.json +1 -0
- eval/metric.short.sentence.paragraph_sentence.question.asahi417_qg_jaquad.default.json +1 -0
- eval/metric.short.sentence.sentence_answer.question.asahi417_qg_jaquad.default.json +1 -0
- eval/samples.test.hyp.paragraph_answer.question.asahi417_qg_jaquad.default.txt +0 -0
- eval/samples.test.hyp.paragraph_sentence.question.asahi417_qg_jaquad.default.txt +0 -0
- eval/samples.test.hyp.sentence_answer.question.asahi417_qg_jaquad.default.txt +0 -0
- eval/samples.validation.hyp.paragraph_answer.question.asahi417_qg_jaquad.default.txt +0 -0
- eval/samples.validation.hyp.paragraph_sentence.question.asahi417_qg_jaquad.default.txt +0 -0
- eval/samples.validation.hyp.sentence_answer.question.asahi417_qg_jaquad.default.txt +0 -0
- pytorch_model.bin +3 -0
- sentencepiece.bpe.model +3 -0
- special_tokens_map.json +1 -0
- tokenizer.json +3 -0
- tokenizer_config.json +1 -0
- trainer_config.json +1 -0
.gitattributes
CHANGED
@@ -25,3 +25,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
25 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
26 |
*.zstandard filter=lfs diff=lfs merge=lfs -text
|
27 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
25 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
26 |
*.zstandard filter=lfs diff=lfs merge=lfs -text
|
27 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
added_tokens.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"<hl>": 250027}
|
config.json
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "lmqg_output/mbart_large_cc25_jaquad/model_xiswyu/epoch_10",
|
3 |
+
"_num_labels": 3,
|
4 |
+
"activation_dropout": 0.0,
|
5 |
+
"activation_function": "gelu",
|
6 |
+
"add_bias_logits": false,
|
7 |
+
"add_final_layer_norm": true,
|
8 |
+
"add_prefix": false,
|
9 |
+
"architectures": [
|
10 |
+
"MBartForConditionalGeneration"
|
11 |
+
],
|
12 |
+
"attention_dropout": 0.0,
|
13 |
+
"bos_token_id": 0,
|
14 |
+
"classif_dropout": 0.0,
|
15 |
+
"classifier_dropout": 0.0,
|
16 |
+
"d_model": 1024,
|
17 |
+
"decoder_attention_heads": 16,
|
18 |
+
"decoder_ffn_dim": 4096,
|
19 |
+
"decoder_layerdrop": 0.0,
|
20 |
+
"decoder_layers": 12,
|
21 |
+
"dropout": 0.1,
|
22 |
+
"encoder_attention_heads": 16,
|
23 |
+
"encoder_ffn_dim": 4096,
|
24 |
+
"encoder_layerdrop": 0.0,
|
25 |
+
"encoder_layers": 12,
|
26 |
+
"eos_token_id": 2,
|
27 |
+
"forced_eos_token_id": 2,
|
28 |
+
"id2label": {
|
29 |
+
"0": "LABEL_0",
|
30 |
+
"1": "LABEL_1",
|
31 |
+
"2": "LABEL_2"
|
32 |
+
},
|
33 |
+
"init_std": 0.02,
|
34 |
+
"is_encoder_decoder": true,
|
35 |
+
"label2id": {
|
36 |
+
"LABEL_0": 0,
|
37 |
+
"LABEL_1": 1,
|
38 |
+
"LABEL_2": 2
|
39 |
+
},
|
40 |
+
"max_length": 1024,
|
41 |
+
"max_position_embeddings": 1024,
|
42 |
+
"model_type": "mbart",
|
43 |
+
"normalize_before": true,
|
44 |
+
"normalize_embedding": true,
|
45 |
+
"num_beams": 5,
|
46 |
+
"num_hidden_layers": 12,
|
47 |
+
"output_past": true,
|
48 |
+
"pad_token_id": 1,
|
49 |
+
"scale_embedding": true,
|
50 |
+
"static_position_embeddings": false,
|
51 |
+
"task_specific_params": {
|
52 |
+
"translation_en_to_ro": {
|
53 |
+
"decoder_start_token_id": 250020
|
54 |
+
}
|
55 |
+
},
|
56 |
+
"torch_dtype": "float32",
|
57 |
+
"transformers_version": "4.18.0.dev0",
|
58 |
+
"use_cache": true,
|
59 |
+
"vocab_size": 250028
|
60 |
+
}
|
eval/metric.first.answer.paragraph_answer.question.asahi417_qg_jaquad.default.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"validation": {"Bleu_1": 0.5274188532615222, "Bleu_2": 0.40547001889446466, "Bleu_3": 0.32870641278291424, "Bleu_4": 0.2738245851749123, "METEOR": 0.2777121508037796, "ROUGE_L": 0.4954107309166194, "BERTScore": 0.8121864595186834}, "test": {"Bleu_1": 0.5635177314613133, "Bleu_2": 0.4484887151523309, "Bleu_3": 0.3725830061624946, "Bleu_4": 0.3165545089603864, "METEOR": 0.2979579993001128, "ROUGE_L": 0.5269857300708057, "BERTScore": 0.8234216769082379}}
|
eval/metric.first.answer.paragraph_sentence.question.asahi417_qg_jaquad.default.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"validation": {"Bleu_1": 0.41289767224991436, "Bleu_2": 0.2720423272672471, "Bleu_3": 0.1957250595292615, "Bleu_4": 0.14738355023528266, "METEOR": 0.21284751944010977, "ROUGE_L": 0.369528108767733, "BERTScore": 0.7533362596795836}, "test": {"Bleu_1": 0.42775529057116984, "Bleu_2": 0.2884774471174323, "Bleu_3": 0.21153981432289004, "Bleu_4": 0.16247213431500857, "METEOR": 0.21570708781468395, "ROUGE_L": 0.37900152204076715, "BERTScore": 0.7565226740236662}}
|
eval/metric.first.answer.sentence_answer.question.asahi417_qg_jaquad.default.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"validation": {"Bleu_1": 0.5098431478245339, "Bleu_2": 0.3773787096493933, "Bleu_3": 0.29813690317079117, "Bleu_4": 0.24340900378639432, "METEOR": 0.25757993557189585, "ROUGE_L": 0.46746290063804813, "BERTScore": 0.8007896726401998}, "test": {"Bleu_1": 0.528847640898488, "Bleu_2": 0.4037607238010446, "Bleu_3": 0.325243398558213, "Bleu_4": 0.26980134208740525, "METEOR": 0.2703124113000665, "ROUGE_L": 0.48415290331541433, "BERTScore": 0.8069095808362925}}
|
eval/metric.first.sentence.paragraph_answer.question.asahi417_qg_jaquad.default.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"validation": {"Bleu_1": 0.5653464485221638, "Bleu_2": 0.43967994147576295, "Bleu_3": 0.35895528161075346, "Bleu_4": 0.300735672398057, "METEOR": 0.28940520664229974, "ROUGE_L": 0.5115044175185602, "BERTScore": 0.8140528204859925}, "test": {"Bleu_1": 0.5704558339292177, "Bleu_2": 0.4545072025586066, "Bleu_3": 0.37813802575407046, "Bleu_4": 0.32156776073917387, "METEOR": 0.2997311570800795, "ROUGE_L": 0.5294969429504184, "BERTScore": 0.8225831409256842}}
|
eval/metric.first.sentence.paragraph_sentence.question.asahi417_qg_jaquad.default.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"validation": {"Bleu_1": 0.444192919574916, "Bleu_2": 0.3009320852404868, "Bleu_3": 0.22108296707820238, "Bleu_4": 0.16905331937647144, "METEOR": 0.22477754954601992, "ROUGE_L": 0.38902050296312507, "BERTScore": 0.7538187135014712}, "test": {"Bleu_1": 0.4358009481211608, "Bleu_2": 0.2966029780762715, "Bleu_3": 0.21965696525531758, "Bleu_4": 0.16978831783003745, "METEOR": 0.22054162625943596, "ROUGE_L": 0.384733441084801, "BERTScore": 0.7570246487798075}}
|
eval/metric.first.sentence.sentence_answer.question.asahi417_qg_jaquad.default.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"validation": {"Bleu_1": 0.5438711960303027, "Bleu_2": 0.40616778323828523, "Bleu_3": 0.3222453946066634, "Bleu_4": 0.2638823617463151, "METEOR": 0.2654318831925565, "ROUGE_L": 0.47845867364019085, "BERTScore": 0.8015367157975019}, "test": {"Bleu_1": 0.5370018543288624, "Bleu_2": 0.4105511072733761, "Bleu_3": 0.3314172117527962, "Bleu_4": 0.2752336493570722, "METEOR": 0.27146682468881067, "ROUGE_L": 0.48628767610579504, "BERTScore": 0.8059127348544097}}
|
eval/metric.last.sentence.paragraph_answer.question.asahi417_qg_jaquad.default.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"validation": {"Bleu_1": 0.560798454169834, "Bleu_2": 0.4334982406116894, "Bleu_3": 0.35145042274996086, "Bleu_4": 0.2920406895113235, "METEOR": 0.28533850243511255, "ROUGE_L": 0.5033620789471159, "BERTScore": 0.8017919947126386}, "test": {"Bleu_1": 0.5703951320038019, "Bleu_2": 0.4545869832879288, "Bleu_3": 0.37821168770052727, "Bleu_4": 0.3215015646141678, "METEOR": 0.29983249913177096, "ROUGE_L": 0.5297967564121377, "BERTScore": 0.8199281026889388}}
|
eval/metric.last.sentence.paragraph_sentence.question.asahi417_qg_jaquad.default.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"validation": {"Bleu_1": 0.444192919574916, "Bleu_2": 0.3009320852404868, "Bleu_3": 0.22108296707820238, "Bleu_4": 0.16905331937647144, "METEOR": 0.22477754954601992, "ROUGE_L": 0.38902050296312507, "BERTScore": 0.7538187141221443}, "test": {"Bleu_1": 0.4358009481211608, "Bleu_2": 0.2966029780762715, "Bleu_3": 0.21965696525531758, "Bleu_4": 0.16978831783003745, "METEOR": 0.22054162625943596, "ROUGE_L": 0.384733441084801, "BERTScore": 0.7570246478709481}}
|
eval/metric.last.sentence.sentence_answer.question.asahi417_qg_jaquad.default.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"validation": {"Bleu_1": 0.5412026324962608, "Bleu_2": 0.40320140996355486, "Bleu_3": 0.31873027962314243, "Bleu_4": 0.2595208924533899, "METEOR": 0.2634667821165999, "ROUGE_L": 0.4729521573327688, "BERTScore": 0.7907279716428591}, "test": {"Bleu_1": 0.5363557461877765, "Bleu_2": 0.4095612608826678, "Bleu_3": 0.3303911837537535, "Bleu_4": 0.2741845003003641, "METEOR": 0.27135624221340554, "ROUGE_L": 0.4857398832857524, "BERTScore": 0.8032292966403755}}
|
eval/metric.long.sentence.paragraph_answer.question.asahi417_qg_jaquad.default.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"validation": {"Bleu_1": 0.568392587954497, "Bleu_2": 0.4399810436936495, "Bleu_3": 0.35752827444791824, "Bleu_4": 0.29794961909369816, "METEOR": 0.2894245825001034, "ROUGE_L": 0.5046402441783366, "BERTScore": 0.8041220551615078}, "test": {"Bleu_1": 0.5727270366793551, "Bleu_2": 0.45671197486848036, "Bleu_3": 0.38010223883854427, "Bleu_4": 0.32329513319692943, "METEOR": 0.30104662330221504, "ROUGE_L": 0.529778172126687, "BERTScore": 0.8209493007063701}}
|
eval/metric.long.sentence.paragraph_sentence.question.asahi417_qg_jaquad.default.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"validation": {"Bleu_1": 0.444192919574916, "Bleu_2": 0.3009320852404868, "Bleu_3": 0.22108296707820238, "Bleu_4": 0.16905331937647144, "METEOR": 0.22477754954601992, "ROUGE_L": 0.38902050296312507, "BERTScore": 0.7538187137016883}, "test": {"Bleu_1": 0.4358009481211608, "Bleu_2": 0.2966029780762715, "Bleu_3": 0.21965696525531758, "Bleu_4": 0.16978831783003745, "METEOR": 0.22054162625943596, "ROUGE_L": 0.384733441084801, "BERTScore": 0.7570246485484615}}
|
eval/metric.long.sentence.sentence_answer.question.asahi417_qg_jaquad.default.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"validation": {"Bleu_1": 0.546726786661128, "Bleu_2": 0.40787467049153825, "Bleu_3": 0.3234204966723321, "Bleu_4": 0.26436296421483324, "METEOR": 0.2667709544251534, "ROUGE_L": 0.4735086497558037, "BERTScore": 0.7918837651395141}, "test": {"Bleu_1": 0.5389613247046989, "Bleu_2": 0.41197955960752275, "Bleu_3": 0.3326403564569594, "Bleu_4": 0.2763045433897659, "METEOR": 0.2727392604807481, "ROUGE_L": 0.48620335548199917, "BERTScore": 0.8040031268387116}}
|
eval/metric.middle.sentence.paragraph_answer.question.asahi417_qg_jaquad.default.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"validation": {"Bleu_1": 0.5670116764334643, "Bleu_2": 0.4386024318926107, "Bleu_3": 0.3560806908794358, "Bleu_4": 0.2964071435445934, "METEOR": 0.2880823680012462, "ROUGE_L": 0.5049847900276104, "BERTScore": 0.8052403238302596}, "test": {"Bleu_1": 0.5718442516309373, "Bleu_2": 0.45553060373031484, "Bleu_3": 0.37876495507177377, "Bleu_4": 0.32191455028559773, "METEOR": 0.30041038325848496, "ROUGE_L": 0.5291365762132131, "BERTScore": 0.8209879956881293}}
|
eval/metric.middle.sentence.paragraph_sentence.question.asahi417_qg_jaquad.default.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"validation": {"Bleu_1": 0.444192919574916, "Bleu_2": 0.3009320852404868, "Bleu_3": 0.22108296707820238, "Bleu_4": 0.16905331937647144, "METEOR": 0.22477754954601992, "ROUGE_L": 0.38902050296312507, "BERTScore": 0.7538187146827523}, "test": {"Bleu_1": 0.4358009481211608, "Bleu_2": 0.2966029780762715, "Bleu_3": 0.21965696525531758, "Bleu_4": 0.16978831783003745, "METEOR": 0.22054162625943596, "ROUGE_L": 0.384733441084801, "BERTScore": 0.757024648862431}}
|
eval/metric.middle.sentence.sentence_answer.question.asahi417_qg_jaquad.default.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"validation": {"Bleu_1": 0.5451858021622283, "Bleu_2": 0.40648034266648575, "Bleu_3": 0.32177649302472533, "Bleu_4": 0.26252423986878454, "METEOR": 0.26590641056349956, "ROUGE_L": 0.47365322031696094, "BERTScore": 0.7936843107881504}, "test": {"Bleu_1": 0.5389894014957354, "Bleu_2": 0.4120842349125909, "Bleu_3": 0.33280095801210047, "Bleu_4": 0.2764301851607721, "METEOR": 0.2724499841190711, "ROUGE_L": 0.48640826088512706, "BERTScore": 0.8041495558691382}}
|
eval/metric.short.sentence.paragraph_answer.question.asahi417_qg_jaquad.default.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"validation": {"Bleu_1": 0.557809804850008, "Bleu_2": 0.43380152427453056, "Bleu_3": 0.35365962980866134, "Bleu_4": 0.29579381843940805, "METEOR": 0.2859066509322891, "ROUGE_L": 0.5116368297088284, "BERTScore": 0.8106889476261966}, "test": {"Bleu_1": 0.5681964855529469, "Bleu_2": 0.45265819504499333, "Bleu_3": 0.3765821362304531, "Bleu_4": 0.32009169158777667, "METEOR": 0.29849457496771464, "ROUGE_L": 0.5295264755470501, "BERTScore": 0.8213871886996572}}
|
eval/metric.short.sentence.paragraph_sentence.question.asahi417_qg_jaquad.default.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"validation": {"Bleu_1": 0.444192919574916, "Bleu_2": 0.3009320852404868, "Bleu_3": 0.22108296707820238, "Bleu_4": 0.16905331937647144, "METEOR": 0.22477754954601992, "ROUGE_L": 0.38902050296312507, "BERTScore": 0.7538187135214929}, "test": {"Bleu_1": 0.4358009481211608, "Bleu_2": 0.2966029780762715, "Bleu_3": 0.21965696525531758, "Bleu_4": 0.16978831783003745, "METEOR": 0.22054162625943596, "ROUGE_L": 0.384733441084801, "BERTScore": 0.7570246473091079}}
|
eval/metric.short.sentence.sentence_answer.question.asahi417_qg_jaquad.default.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"validation": {"Bleu_1": 0.5385066166374365, "Bleu_2": 0.40229554115215904, "Bleu_3": 0.3188553456157398, "Bleu_4": 0.260804026641529, "METEOR": 0.26276684732858946, "ROUGE_L": 0.479476439903538, "BERTScore": 0.7990169037017079}, "test": {"Bleu_1": 0.5342253872645868, "Bleu_2": 0.40795714823225876, "Bleu_3": 0.32900121284591144, "Bleu_4": 0.2730473052819015, "METEOR": 0.2701478309891577, "ROUGE_L": 0.4857541595418076, "BERTScore": 0.8048721521241796}}
|
eval/samples.test.hyp.paragraph_answer.question.asahi417_qg_jaquad.default.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
eval/samples.test.hyp.paragraph_sentence.question.asahi417_qg_jaquad.default.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
eval/samples.test.hyp.sentence_answer.question.asahi417_qg_jaquad.default.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
eval/samples.validation.hyp.paragraph_answer.question.asahi417_qg_jaquad.default.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
eval/samples.validation.hyp.paragraph_sentence.question.asahi417_qg_jaquad.default.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
eval/samples.validation.hyp.sentence_answer.question.asahi417_qg_jaquad.default.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d1c3a54c3841329b088436aadc6b7c049176801a590e97072ec0ba6ea1a22295
|
3 |
+
size 2444604857
|
sentencepiece.bpe.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
|
3 |
+
size 5069051
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}, "additional_special_tokens": ["<hl>"]}
|
tokenizer.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a11892dd2e34180882f7d01c5f35400945ce4e3883fd977d42e9ba9d37dbece2
|
3 |
+
size 17088356
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "sep_token": "</s>", "cls_token": "<s>", "unk_token": "<unk>", "pad_token": "<pad>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "src_lang": null, "tgt_lang": null, "additional_special_tokens": null, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "lmqg_output/mbart_large_cc25_jaquad/model_xiswyu/epoch_10", "tokenizer_class": "MBartTokenizer"}
|
trainer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"dataset_path": "asahi417/qg_jaquad", "dataset_name": "default", "input_types": ["paragraph_answer"], "output_types": ["question"], "prefix_types": null, "model": "facebook/mbart-large-cc25", "max_length": 512, "max_length_output": 32, "epoch": 12, "batch": 64, "lr": 0.0001, "fp16": false, "random_seed": 1, "gradient_accumulation_steps": 1, "label_smoothing": 0.15}
|