ti250 commited on
Commit
596a612
1 Parent(s): 61f355a

Upload 15 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ eval_nbest_predictions.json filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,3 +1,41 @@
1
  ---
2
- license: mit
 
 
 
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ tags:
3
+ - generated_from_trainer
4
+ datasets:
5
+ - squad_v2
6
+ model-index:
7
+ - name: photocatalysisbert-squad2
8
+ results: []
9
  ---
10
+
11
+ # photocatalysisbert-squad2
12
+
13
+ This model is a fine-tuned version of [CambridgeMolecularEngineering/photocatalysisbert](https://huggingface.co/CambridgeMolecularEngineering/photocatalysisbert) on the squad_v2 dataset.
14
+
15
+ ## Training procedure
16
+
17
+ ### Training hyperparameters
18
+
19
+ The following hyperparameters were used during training:
20
+ - learning_rate: 3e-05
21
+ - train_batch_size: 1
22
+ - eval_batch_size: 8
23
+ - seed: 0
24
+ - distributed_type: multi-GPU
25
+ - num_devices: 20
26
+ - total_train_batch_size: 20
27
+ - total_eval_batch_size: 160
28
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
29
+ - lr_scheduler_type: linear
30
+ - num_epochs: 2.0
31
+
32
+ ### Framework versions
33
+
34
+ - Transformers 4.25.1
35
+ - Pytorch 1.12.0a0+git664058f
36
+ - Datasets 2.7.1
37
+ - Tokenizers 0.12.1
38
+
39
+ ## Acknowledgements
40
+
41
+ This model was trained for the paper "How beneficial is pre-training on a narrow domain-specific corpus for information extraction about photocatalytic water splitting?" by Taketomo Isazawa and Jacqueline M. Cole. J.M.C. is grateful for the BASF/Royal Academy of Engineering Research Chair in Data-Driven Molecular Engineering of Functional Materials, which includes PhD studentship support (for T.I.). This Chair is also partly supported by the Science and Technology Facilities Council. They are also indebted to the Argonne Leadership Computing Facility, which is a DOE Office of Science Facility, for use of its research resources, under contract No. DE-AC02-06CH11357.
all_results.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.0,
3
+ "eval_HasAns_exact": 63.107287449392715,
4
+ "eval_HasAns_f1": 69.28034551777634,
5
+ "eval_HasAns_total": 5928,
6
+ "eval_NoAns_exact": 77.54415475189235,
7
+ "eval_NoAns_f1": 77.54415475189235,
8
+ "eval_NoAns_total": 5945,
9
+ "eval_best_exact": 70.34447907015918,
10
+ "eval_best_exact_thresh": 0.0,
11
+ "eval_best_f1": 73.42658875005311,
12
+ "eval_best_f1_thresh": 0.0,
13
+ "eval_exact": 70.33605659900614,
14
+ "eval_f1": 73.41816627890007,
15
+ "eval_samples": 12000,
16
+ "eval_total": 11873,
17
+ "train_loss": 1.2332238556411854,
18
+ "train_runtime": 1237.3872,
19
+ "train_samples": 130868,
20
+ "train_samples_per_second": 211.523,
21
+ "train_steps_per_second": 10.577
22
+ }
config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/grand/projects/SolarWindowsADSP/taketomo/polaris_outputs/final_models/MLM-final-scraped-g",
3
+ "architectures": [
4
+ "BertForQuestionAnswering"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 3072,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "bert",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 12,
18
+ "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
20
+ "torch_dtype": "float32",
21
+ "transformers_version": "4.25.1",
22
+ "type_vocab_size": 2,
23
+ "use_cache": true,
24
+ "vocab_size": 30522
25
+ }
eval_nbest_predictions.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3689ad3f0e9ef33ecbba7f32b8017c1e16debecbcfb097a630f994359cee42b0
3
+ size 54996272
eval_null_odds.json ADDED
The diff for this file is too large to render. See raw diff
 
eval_predictions.json ADDED
The diff for this file is too large to render. See raw diff
 
eval_results.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.0,
3
+ "eval_HasAns_exact": 63.107287449392715,
4
+ "eval_HasAns_f1": 69.28034551777634,
5
+ "eval_HasAns_total": 5928,
6
+ "eval_NoAns_exact": 77.54415475189235,
7
+ "eval_NoAns_f1": 77.54415475189235,
8
+ "eval_NoAns_total": 5945,
9
+ "eval_best_exact": 70.34447907015918,
10
+ "eval_best_exact_thresh": 0.0,
11
+ "eval_best_f1": 73.42658875005311,
12
+ "eval_best_f1_thresh": 0.0,
13
+ "eval_exact": 70.33605659900614,
14
+ "eval_f1": 73.41816627890007,
15
+ "eval_samples": 12000,
16
+ "eval_total": 11873
17
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9260a77caeda080b99a007b1b06db7b52418770a462f6f658694be8743c8404
3
+ size 435643185
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "do_lower_case": true,
4
+ "mask_token": "[MASK]",
5
+ "model_max_length": 512,
6
+ "name_or_path": "/grand/projects/SolarWindowsADSP/taketomo/polaris_outputs/final_models/MLM-final-scraped-g",
7
+ "pad_token": "[PAD]",
8
+ "sep_token": "[SEP]",
9
+ "special_tokens_map_file": null,
10
+ "strip_accents": null,
11
+ "tokenize_chinese_chars": true,
12
+ "tokenizer_class": "BertTokenizer",
13
+ "unk_token": "[UNK]"
14
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.0,
3
+ "train_loss": 1.2332238556411854,
4
+ "train_runtime": 1237.3872,
5
+ "train_samples": 130868,
6
+ "train_samples_per_second": 211.523,
7
+ "train_steps_per_second": 10.577
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "global_step": 13088,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.08,
12
+ "learning_rate": 2.88539119804401e-05,
13
+ "loss": 2.3229,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.15,
18
+ "learning_rate": 2.7707823960880195e-05,
19
+ "loss": 1.7926,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 0.23,
24
+ "learning_rate": 2.6561735941320294e-05,
25
+ "loss": 1.6682,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 0.31,
30
+ "learning_rate": 2.5415647921760392e-05,
31
+ "loss": 1.6135,
32
+ "step": 2000
33
+ },
34
+ {
35
+ "epoch": 0.38,
36
+ "learning_rate": 2.4269559902200488e-05,
37
+ "loss": 1.5343,
38
+ "step": 2500
39
+ },
40
+ {
41
+ "epoch": 0.46,
42
+ "learning_rate": 2.3123471882640587e-05,
43
+ "loss": 1.4642,
44
+ "step": 3000
45
+ },
46
+ {
47
+ "epoch": 0.53,
48
+ "learning_rate": 2.1977383863080685e-05,
49
+ "loss": 1.423,
50
+ "step": 3500
51
+ },
52
+ {
53
+ "epoch": 0.61,
54
+ "learning_rate": 2.083129584352078e-05,
55
+ "loss": 1.41,
56
+ "step": 4000
57
+ },
58
+ {
59
+ "epoch": 0.69,
60
+ "learning_rate": 1.9685207823960883e-05,
61
+ "loss": 1.3659,
62
+ "step": 4500
63
+ },
64
+ {
65
+ "epoch": 0.76,
66
+ "learning_rate": 1.8539119804400978e-05,
67
+ "loss": 1.3153,
68
+ "step": 5000
69
+ },
70
+ {
71
+ "epoch": 0.84,
72
+ "learning_rate": 1.7393031784841077e-05,
73
+ "loss": 1.2976,
74
+ "step": 5500
75
+ },
76
+ {
77
+ "epoch": 0.92,
78
+ "learning_rate": 1.6246943765281176e-05,
79
+ "loss": 1.2462,
80
+ "step": 6000
81
+ },
82
+ {
83
+ "epoch": 0.99,
84
+ "learning_rate": 1.5100855745721271e-05,
85
+ "loss": 1.2594,
86
+ "step": 6500
87
+ },
88
+ {
89
+ "epoch": 1.07,
90
+ "learning_rate": 1.395476772616137e-05,
91
+ "loss": 1.0087,
92
+ "step": 7000
93
+ },
94
+ {
95
+ "epoch": 1.15,
96
+ "learning_rate": 1.2808679706601467e-05,
97
+ "loss": 1.0272,
98
+ "step": 7500
99
+ },
100
+ {
101
+ "epoch": 1.22,
102
+ "learning_rate": 1.1662591687041566e-05,
103
+ "loss": 1.0037,
104
+ "step": 8000
105
+ },
106
+ {
107
+ "epoch": 1.3,
108
+ "learning_rate": 1.0516503667481663e-05,
109
+ "loss": 0.968,
110
+ "step": 8500
111
+ },
112
+ {
113
+ "epoch": 1.38,
114
+ "learning_rate": 9.37041564792176e-06,
115
+ "loss": 0.9763,
116
+ "step": 9000
117
+ },
118
+ {
119
+ "epoch": 1.45,
120
+ "learning_rate": 8.224327628361858e-06,
121
+ "loss": 0.9705,
122
+ "step": 9500
123
+ },
124
+ {
125
+ "epoch": 1.53,
126
+ "learning_rate": 7.078239608801956e-06,
127
+ "loss": 0.9548,
128
+ "step": 10000
129
+ },
130
+ {
131
+ "epoch": 1.6,
132
+ "learning_rate": 5.932151589242053e-06,
133
+ "loss": 0.955,
134
+ "step": 10500
135
+ },
136
+ {
137
+ "epoch": 1.68,
138
+ "learning_rate": 4.786063569682151e-06,
139
+ "loss": 0.932,
140
+ "step": 11000
141
+ },
142
+ {
143
+ "epoch": 1.76,
144
+ "learning_rate": 3.6399755501222492e-06,
145
+ "loss": 0.9331,
146
+ "step": 11500
147
+ },
148
+ {
149
+ "epoch": 1.83,
150
+ "learning_rate": 2.493887530562347e-06,
151
+ "loss": 0.9126,
152
+ "step": 12000
153
+ },
154
+ {
155
+ "epoch": 1.91,
156
+ "learning_rate": 1.347799511002445e-06,
157
+ "loss": 0.909,
158
+ "step": 12500
159
+ },
160
+ {
161
+ "epoch": 1.99,
162
+ "learning_rate": 2.0171149144254277e-07,
163
+ "loss": 0.89,
164
+ "step": 13000
165
+ },
166
+ {
167
+ "epoch": 2.0,
168
+ "step": 13088,
169
+ "total_flos": 2.5921698247213056e+16,
170
+ "train_loss": 1.2332238556411854,
171
+ "train_runtime": 1237.3872,
172
+ "train_samples_per_second": 211.523,
173
+ "train_steps_per_second": 10.577
174
+ }
175
+ ],
176
+ "max_steps": 13088,
177
+ "num_train_epochs": 2,
178
+ "total_flos": 2.5921698247213056e+16,
179
+ "trial_name": null,
180
+ "trial_params": null
181
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3bd0dad9a536e43222030bd8964bf6a32329b194796221bddafad3f310376e4
3
+ size 4271
vocab.txt ADDED
The diff for this file is too large to render. See raw diff