Upload 15 files
Browse files- .gitattributes +1 -0
- README.md +39 -1
- all_results.json +22 -0
- config.json +25 -0
- eval_nbest_predictions.json +3 -0
- eval_null_odds.json +0 -0
- eval_predictions.json +0 -0
- eval_results.json +17 -0
- pytorch_model.bin +3 -0
- special_tokens_map.json +7 -0
- tokenizer.json +0 -0
- tokenizer_config.json +14 -0
- train_results.json +8 -0
- trainer_state.json +181 -0
- training_args.bin +3 -0
- vocab.txt +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
eval_nbest_predictions.json filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
@@ -1,3 +1,41 @@
|
|
1 |
---
|
2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
tags:
|
3 |
+
- generated_from_trainer
|
4 |
+
datasets:
|
5 |
+
- squad_v2
|
6 |
+
model-index:
|
7 |
+
- name: photocatalysisbert-squad2
|
8 |
+
results: []
|
9 |
---
|
10 |
+
|
11 |
+
# photocatalysisbert-squad2
|
12 |
+
|
13 |
+
This model is a fine-tuned version of [CambridgeMolecularEngineering/photocatalysisbert](https://huggingface.co/CambridgeMolecularEngineering/photocatalysisbert) on the squad_v2 dataset.
|
14 |
+
|
15 |
+
## Training procedure
|
16 |
+
|
17 |
+
### Training hyperparameters
|
18 |
+
|
19 |
+
The following hyperparameters were used during training:
|
20 |
+
- learning_rate: 3e-05
|
21 |
+
- train_batch_size: 1
|
22 |
+
- eval_batch_size: 8
|
23 |
+
- seed: 0
|
24 |
+
- distributed_type: multi-GPU
|
25 |
+
- num_devices: 20
|
26 |
+
- total_train_batch_size: 20
|
27 |
+
- total_eval_batch_size: 160
|
28 |
+
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
29 |
+
- lr_scheduler_type: linear
|
30 |
+
- num_epochs: 2.0
|
31 |
+
|
32 |
+
### Framework versions
|
33 |
+
|
34 |
+
- Transformers 4.25.1
|
35 |
+
- Pytorch 1.12.0a0+git664058f
|
36 |
+
- Datasets 2.7.1
|
37 |
+
- Tokenizers 0.12.1
|
38 |
+
|
39 |
+
## Acknowledgements
|
40 |
+
|
41 |
+
This model was trained for the paper "How beneficial is pre-training on a narrow domain-specific corpus for information extraction about photocatalytic water splitting?" by Taketomo Isazawa and Jacqueline M. Cole. J.M.C. is grateful for the BASF/Royal Academy of Engineering Research Chair in Data-Driven Molecular Engineering of Functional Materials, which includes PhD studentship support (for T.I.). This Chair is also partly supported by the Science and Technology Facilities Council. They are also indebted to the Argonne Leadership Computing Facility, which is a DOE Office of Science Facility, for use of its research resources, under contract No. DE-AC02-06CH11357.
|
all_results.json
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 2.0,
|
3 |
+
"eval_HasAns_exact": 63.107287449392715,
|
4 |
+
"eval_HasAns_f1": 69.28034551777634,
|
5 |
+
"eval_HasAns_total": 5928,
|
6 |
+
"eval_NoAns_exact": 77.54415475189235,
|
7 |
+
"eval_NoAns_f1": 77.54415475189235,
|
8 |
+
"eval_NoAns_total": 5945,
|
9 |
+
"eval_best_exact": 70.34447907015918,
|
10 |
+
"eval_best_exact_thresh": 0.0,
|
11 |
+
"eval_best_f1": 73.42658875005311,
|
12 |
+
"eval_best_f1_thresh": 0.0,
|
13 |
+
"eval_exact": 70.33605659900614,
|
14 |
+
"eval_f1": 73.41816627890007,
|
15 |
+
"eval_samples": 12000,
|
16 |
+
"eval_total": 11873,
|
17 |
+
"train_loss": 1.2332238556411854,
|
18 |
+
"train_runtime": 1237.3872,
|
19 |
+
"train_samples": 130868,
|
20 |
+
"train_samples_per_second": 211.523,
|
21 |
+
"train_steps_per_second": 10.577
|
22 |
+
}
|
config.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "/grand/projects/SolarWindowsADSP/taketomo/polaris_outputs/final_models/MLM-final-scraped-g",
|
3 |
+
"architectures": [
|
4 |
+
"BertForQuestionAnswering"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"hidden_act": "gelu",
|
9 |
+
"hidden_dropout_prob": 0.1,
|
10 |
+
"hidden_size": 768,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"intermediate_size": 3072,
|
13 |
+
"layer_norm_eps": 1e-12,
|
14 |
+
"max_position_embeddings": 512,
|
15 |
+
"model_type": "bert",
|
16 |
+
"num_attention_heads": 12,
|
17 |
+
"num_hidden_layers": 12,
|
18 |
+
"pad_token_id": 0,
|
19 |
+
"position_embedding_type": "absolute",
|
20 |
+
"torch_dtype": "float32",
|
21 |
+
"transformers_version": "4.25.1",
|
22 |
+
"type_vocab_size": 2,
|
23 |
+
"use_cache": true,
|
24 |
+
"vocab_size": 30522
|
25 |
+
}
|
eval_nbest_predictions.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3689ad3f0e9ef33ecbba7f32b8017c1e16debecbcfb097a630f994359cee42b0
|
3 |
+
size 54996272
|
eval_null_odds.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
eval_predictions.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
eval_results.json
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 2.0,
|
3 |
+
"eval_HasAns_exact": 63.107287449392715,
|
4 |
+
"eval_HasAns_f1": 69.28034551777634,
|
5 |
+
"eval_HasAns_total": 5928,
|
6 |
+
"eval_NoAns_exact": 77.54415475189235,
|
7 |
+
"eval_NoAns_f1": 77.54415475189235,
|
8 |
+
"eval_NoAns_total": 5945,
|
9 |
+
"eval_best_exact": 70.34447907015918,
|
10 |
+
"eval_best_exact_thresh": 0.0,
|
11 |
+
"eval_best_f1": 73.42658875005311,
|
12 |
+
"eval_best_f1_thresh": 0.0,
|
13 |
+
"eval_exact": 70.33605659900614,
|
14 |
+
"eval_f1": 73.41816627890007,
|
15 |
+
"eval_samples": 12000,
|
16 |
+
"eval_total": 11873
|
17 |
+
}
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b9260a77caeda080b99a007b1b06db7b52418770a462f6f658694be8743c8404
|
3 |
+
size 435643185
|
special_tokens_map.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"mask_token": "[MASK]",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
+
"sep_token": "[SEP]",
|
6 |
+
"unk_token": "[UNK]"
|
7 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"do_lower_case": true,
|
4 |
+
"mask_token": "[MASK]",
|
5 |
+
"model_max_length": 512,
|
6 |
+
"name_or_path": "/grand/projects/SolarWindowsADSP/taketomo/polaris_outputs/final_models/MLM-final-scraped-g",
|
7 |
+
"pad_token": "[PAD]",
|
8 |
+
"sep_token": "[SEP]",
|
9 |
+
"special_tokens_map_file": null,
|
10 |
+
"strip_accents": null,
|
11 |
+
"tokenize_chinese_chars": true,
|
12 |
+
"tokenizer_class": "BertTokenizer",
|
13 |
+
"unk_token": "[UNK]"
|
14 |
+
}
|
train_results.json
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 2.0,
|
3 |
+
"train_loss": 1.2332238556411854,
|
4 |
+
"train_runtime": 1237.3872,
|
5 |
+
"train_samples": 130868,
|
6 |
+
"train_samples_per_second": 211.523,
|
7 |
+
"train_steps_per_second": 10.577
|
8 |
+
}
|
trainer_state.json
ADDED
@@ -0,0 +1,181 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
+
"global_step": 13088,
|
6 |
+
"is_hyper_param_search": false,
|
7 |
+
"is_local_process_zero": true,
|
8 |
+
"is_world_process_zero": true,
|
9 |
+
"log_history": [
|
10 |
+
{
|
11 |
+
"epoch": 0.08,
|
12 |
+
"learning_rate": 2.88539119804401e-05,
|
13 |
+
"loss": 2.3229,
|
14 |
+
"step": 500
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"epoch": 0.15,
|
18 |
+
"learning_rate": 2.7707823960880195e-05,
|
19 |
+
"loss": 1.7926,
|
20 |
+
"step": 1000
|
21 |
+
},
|
22 |
+
{
|
23 |
+
"epoch": 0.23,
|
24 |
+
"learning_rate": 2.6561735941320294e-05,
|
25 |
+
"loss": 1.6682,
|
26 |
+
"step": 1500
|
27 |
+
},
|
28 |
+
{
|
29 |
+
"epoch": 0.31,
|
30 |
+
"learning_rate": 2.5415647921760392e-05,
|
31 |
+
"loss": 1.6135,
|
32 |
+
"step": 2000
|
33 |
+
},
|
34 |
+
{
|
35 |
+
"epoch": 0.38,
|
36 |
+
"learning_rate": 2.4269559902200488e-05,
|
37 |
+
"loss": 1.5343,
|
38 |
+
"step": 2500
|
39 |
+
},
|
40 |
+
{
|
41 |
+
"epoch": 0.46,
|
42 |
+
"learning_rate": 2.3123471882640587e-05,
|
43 |
+
"loss": 1.4642,
|
44 |
+
"step": 3000
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.53,
|
48 |
+
"learning_rate": 2.1977383863080685e-05,
|
49 |
+
"loss": 1.423,
|
50 |
+
"step": 3500
|
51 |
+
},
|
52 |
+
{
|
53 |
+
"epoch": 0.61,
|
54 |
+
"learning_rate": 2.083129584352078e-05,
|
55 |
+
"loss": 1.41,
|
56 |
+
"step": 4000
|
57 |
+
},
|
58 |
+
{
|
59 |
+
"epoch": 0.69,
|
60 |
+
"learning_rate": 1.9685207823960883e-05,
|
61 |
+
"loss": 1.3659,
|
62 |
+
"step": 4500
|
63 |
+
},
|
64 |
+
{
|
65 |
+
"epoch": 0.76,
|
66 |
+
"learning_rate": 1.8539119804400978e-05,
|
67 |
+
"loss": 1.3153,
|
68 |
+
"step": 5000
|
69 |
+
},
|
70 |
+
{
|
71 |
+
"epoch": 0.84,
|
72 |
+
"learning_rate": 1.7393031784841077e-05,
|
73 |
+
"loss": 1.2976,
|
74 |
+
"step": 5500
|
75 |
+
},
|
76 |
+
{
|
77 |
+
"epoch": 0.92,
|
78 |
+
"learning_rate": 1.6246943765281176e-05,
|
79 |
+
"loss": 1.2462,
|
80 |
+
"step": 6000
|
81 |
+
},
|
82 |
+
{
|
83 |
+
"epoch": 0.99,
|
84 |
+
"learning_rate": 1.5100855745721271e-05,
|
85 |
+
"loss": 1.2594,
|
86 |
+
"step": 6500
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 1.07,
|
90 |
+
"learning_rate": 1.395476772616137e-05,
|
91 |
+
"loss": 1.0087,
|
92 |
+
"step": 7000
|
93 |
+
},
|
94 |
+
{
|
95 |
+
"epoch": 1.15,
|
96 |
+
"learning_rate": 1.2808679706601467e-05,
|
97 |
+
"loss": 1.0272,
|
98 |
+
"step": 7500
|
99 |
+
},
|
100 |
+
{
|
101 |
+
"epoch": 1.22,
|
102 |
+
"learning_rate": 1.1662591687041566e-05,
|
103 |
+
"loss": 1.0037,
|
104 |
+
"step": 8000
|
105 |
+
},
|
106 |
+
{
|
107 |
+
"epoch": 1.3,
|
108 |
+
"learning_rate": 1.0516503667481663e-05,
|
109 |
+
"loss": 0.968,
|
110 |
+
"step": 8500
|
111 |
+
},
|
112 |
+
{
|
113 |
+
"epoch": 1.38,
|
114 |
+
"learning_rate": 9.37041564792176e-06,
|
115 |
+
"loss": 0.9763,
|
116 |
+
"step": 9000
|
117 |
+
},
|
118 |
+
{
|
119 |
+
"epoch": 1.45,
|
120 |
+
"learning_rate": 8.224327628361858e-06,
|
121 |
+
"loss": 0.9705,
|
122 |
+
"step": 9500
|
123 |
+
},
|
124 |
+
{
|
125 |
+
"epoch": 1.53,
|
126 |
+
"learning_rate": 7.078239608801956e-06,
|
127 |
+
"loss": 0.9548,
|
128 |
+
"step": 10000
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 1.6,
|
132 |
+
"learning_rate": 5.932151589242053e-06,
|
133 |
+
"loss": 0.955,
|
134 |
+
"step": 10500
|
135 |
+
},
|
136 |
+
{
|
137 |
+
"epoch": 1.68,
|
138 |
+
"learning_rate": 4.786063569682151e-06,
|
139 |
+
"loss": 0.932,
|
140 |
+
"step": 11000
|
141 |
+
},
|
142 |
+
{
|
143 |
+
"epoch": 1.76,
|
144 |
+
"learning_rate": 3.6399755501222492e-06,
|
145 |
+
"loss": 0.9331,
|
146 |
+
"step": 11500
|
147 |
+
},
|
148 |
+
{
|
149 |
+
"epoch": 1.83,
|
150 |
+
"learning_rate": 2.493887530562347e-06,
|
151 |
+
"loss": 0.9126,
|
152 |
+
"step": 12000
|
153 |
+
},
|
154 |
+
{
|
155 |
+
"epoch": 1.91,
|
156 |
+
"learning_rate": 1.347799511002445e-06,
|
157 |
+
"loss": 0.909,
|
158 |
+
"step": 12500
|
159 |
+
},
|
160 |
+
{
|
161 |
+
"epoch": 1.99,
|
162 |
+
"learning_rate": 2.0171149144254277e-07,
|
163 |
+
"loss": 0.89,
|
164 |
+
"step": 13000
|
165 |
+
},
|
166 |
+
{
|
167 |
+
"epoch": 2.0,
|
168 |
+
"step": 13088,
|
169 |
+
"total_flos": 2.5921698247213056e+16,
|
170 |
+
"train_loss": 1.2332238556411854,
|
171 |
+
"train_runtime": 1237.3872,
|
172 |
+
"train_samples_per_second": 211.523,
|
173 |
+
"train_steps_per_second": 10.577
|
174 |
+
}
|
175 |
+
],
|
176 |
+
"max_steps": 13088,
|
177 |
+
"num_train_epochs": 2,
|
178 |
+
"total_flos": 2.5921698247213056e+16,
|
179 |
+
"trial_name": null,
|
180 |
+
"trial_params": null
|
181 |
+
}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f3bd0dad9a536e43222030bd8964bf6a32329b194796221bddafad3f310376e4
|
3 |
+
size 4271
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|