mapama247 commited on
Commit
d05404c
·
1 Parent(s): 6addb45

upload ipc_level1_H model

Browse files
.config.json.swp ADDED
Binary file (12.3 kB). View file
 
config.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "../models/roberta-large/",
3
+ "architectures": [
4
+ "RobertaForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "eos_token_id": 2,
9
+ "finetuning_task": "ipc1",
10
+ "gradient_checkpointing": false,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 1024,
14
+ "id2label": {
15
+ "0": "01",
16
+ "1": "02",
17
+ "2": "03",
18
+ "3": "04",
19
+ "4": "05",
20
+ "5": "99"
21
+ },
22
+ "initializer_range": 0.02,
23
+ "intermediate_size": 4096,
24
+ "label2id": {
25
+ "01": 0,
26
+ "02": 1,
27
+ "03": 2,
28
+ "04": 3,
29
+ "05": 4,
30
+ "99": 5
31
+ },
32
+ "layer_norm_eps": 1e-05,
33
+ "max_position_embeddings": 514,
34
+ "model_type": "roberta",
35
+ "num_attention_heads": 16,
36
+ "num_hidden_layers": 24,
37
+ "pad_token_id": 1,
38
+ "position_embedding_type": "absolute",
39
+ "problem_type": "multi_label_classification",
40
+ "torch_dtype": "float32",
41
+ "transformers_version": "4.9.2",
42
+ "type_vocab_size": 1,
43
+ "use_cache": true,
44
+ "vocab_size": 50265
45
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d549fa5163630decdb3cca84499b6f6d5c21870499eb50a69d6f0a1c07e215a
3
+ size 1421627693
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66c8a6f313617d39fb13ca970de19c598085e0ec817ebac9d7a6b7c418de6c51
3
+ size 15523
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}}
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "add_prefix_space": false, "errors": "replace", "sep_token": "</s>", "cls_token": "<s>", "pad_token": "<pad>", "mask_token": "<mask>", "special_tokens_map_file": null, "name_or_path": "../models/roberta-large/", "tokenizer_class": "RobertaTokenizer"}
trainer_state.json ADDED
@@ -0,0 +1,254 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9076670631807177,
3
+ "best_model_checkpoint": "./output//roberta-large_ipc1_H_5_32_5e-6_0.01_0.06_07-08-22_06-40/checkpoint-28000",
4
+ "epoch": 0.2084682793177131,
5
+ "global_step": 28000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.01,
12
+ "learning_rate": 2.481759070829404e-07,
13
+ "loss": 0.4329,
14
+ "step": 2000
15
+ },
16
+ {
17
+ "epoch": 0.01,
18
+ "eval_accuracy": 0.5884747561984958,
19
+ "eval_f1": 0.6872181807983729,
20
+ "eval_loss": 0.2462294101715088,
21
+ "eval_roc_auc": 0.7804816717606253,
22
+ "eval_runtime": 6569.6147,
23
+ "eval_samples_per_second": 33.839,
24
+ "eval_steps_per_second": 2.115,
25
+ "step": 2000
26
+ },
27
+ {
28
+ "epoch": 0.03,
29
+ "learning_rate": 4.963518141658809e-07,
30
+ "loss": 0.1951,
31
+ "step": 4000
32
+ },
33
+ {
34
+ "epoch": 0.03,
35
+ "eval_accuracy": 0.7922514304221094,
36
+ "eval_f1": 0.8496544620977544,
37
+ "eval_loss": 0.14517635107040405,
38
+ "eval_roc_auc": 0.895243709044785,
39
+ "eval_runtime": 6569.3584,
40
+ "eval_samples_per_second": 33.841,
41
+ "eval_steps_per_second": 2.115,
42
+ "step": 4000
43
+ },
44
+ {
45
+ "epoch": 0.04,
46
+ "learning_rate": 7.445277212488212e-07,
47
+ "loss": 0.1382,
48
+ "step": 6000
49
+ },
50
+ {
51
+ "epoch": 0.04,
52
+ "eval_accuracy": 0.8181744575191623,
53
+ "eval_f1": 0.8705377939577977,
54
+ "eval_loss": 0.12493952363729477,
55
+ "eval_roc_auc": 0.9109245202637991,
56
+ "eval_runtime": 6567.6398,
57
+ "eval_samples_per_second": 33.85,
58
+ "eval_steps_per_second": 2.116,
59
+ "step": 6000
60
+ },
61
+ {
62
+ "epoch": 0.06,
63
+ "learning_rate": 9.927036283317617e-07,
64
+ "loss": 0.1226,
65
+ "step": 8000
66
+ },
67
+ {
68
+ "epoch": 0.06,
69
+ "eval_accuracy": 0.8257044154161719,
70
+ "eval_f1": 0.8795041743265541,
71
+ "eval_loss": 0.11273977905511856,
72
+ "eval_roc_auc": 0.9185852786225337,
73
+ "eval_runtime": 6573.4579,
74
+ "eval_samples_per_second": 33.82,
75
+ "eval_steps_per_second": 2.114,
76
+ "step": 8000
77
+ },
78
+ {
79
+ "epoch": 0.07,
80
+ "learning_rate": 1.240879535414702e-06,
81
+ "loss": 0.1121,
82
+ "step": 10000
83
+ },
84
+ {
85
+ "epoch": 0.07,
86
+ "eval_accuracy": 0.8326721004714096,
87
+ "eval_f1": 0.8861235840391877,
88
+ "eval_loss": 0.10739490389823914,
89
+ "eval_roc_auc": 0.9245890326912541,
90
+ "eval_runtime": 6572.6062,
91
+ "eval_samples_per_second": 33.824,
92
+ "eval_steps_per_second": 2.114,
93
+ "step": 10000
94
+ },
95
+ {
96
+ "epoch": 0.09,
97
+ "learning_rate": 1.4890554424976424e-06,
98
+ "loss": 0.1059,
99
+ "step": 12000
100
+ },
101
+ {
102
+ "epoch": 0.09,
103
+ "eval_accuracy": 0.8378045269711036,
104
+ "eval_f1": 0.8920632767595246,
105
+ "eval_loss": 0.1017272025346756,
106
+ "eval_roc_auc": 0.929144259699995,
107
+ "eval_runtime": 6575.5269,
108
+ "eval_samples_per_second": 33.809,
109
+ "eval_steps_per_second": 2.113,
110
+ "step": 12000
111
+ },
112
+ {
113
+ "epoch": 0.1,
114
+ "learning_rate": 1.7372313495805828e-06,
115
+ "loss": 0.1024,
116
+ "step": 14000
117
+ },
118
+ {
119
+ "epoch": 0.1,
120
+ "eval_accuracy": 0.8408677894130772,
121
+ "eval_f1": 0.893626255792282,
122
+ "eval_loss": 0.09984249621629715,
123
+ "eval_roc_auc": 0.928852296252917,
124
+ "eval_runtime": 6574.5786,
125
+ "eval_samples_per_second": 33.814,
126
+ "eval_steps_per_second": 2.113,
127
+ "step": 14000
128
+ },
129
+ {
130
+ "epoch": 0.12,
131
+ "learning_rate": 1.9854072566635234e-06,
132
+ "loss": 0.0974,
133
+ "step": 16000
134
+ },
135
+ {
136
+ "epoch": 0.12,
137
+ "eval_accuracy": 0.8442549210119112,
138
+ "eval_f1": 0.898560228806424,
139
+ "eval_loss": 0.09467251598834991,
140
+ "eval_roc_auc": 0.9336472029116307,
141
+ "eval_runtime": 6575.3988,
142
+ "eval_samples_per_second": 33.81,
143
+ "eval_steps_per_second": 2.113,
144
+ "step": 16000
145
+ },
146
+ {
147
+ "epoch": 0.13,
148
+ "learning_rate": 2.2335831637464636e-06,
149
+ "loss": 0.0964,
150
+ "step": 18000
151
+ },
152
+ {
153
+ "epoch": 0.13,
154
+ "eval_accuracy": 0.8498776494296304,
155
+ "eval_f1": 0.9009191664807081,
156
+ "eval_loss": 0.09292563796043396,
157
+ "eval_roc_auc": 0.9335645456901214,
158
+ "eval_runtime": 6575.3606,
159
+ "eval_samples_per_second": 33.81,
160
+ "eval_steps_per_second": 2.113,
161
+ "step": 18000
162
+ },
163
+ {
164
+ "epoch": 0.15,
165
+ "learning_rate": 2.481759070829404e-06,
166
+ "loss": 0.0954,
167
+ "step": 20000
168
+ },
169
+ {
170
+ "epoch": 0.15,
171
+ "eval_accuracy": 0.8467199251502393,
172
+ "eval_f1": 0.8968800725980649,
173
+ "eval_loss": 0.09573328495025635,
174
+ "eval_roc_auc": 0.9303951610003275,
175
+ "eval_runtime": 6576.7864,
176
+ "eval_samples_per_second": 33.803,
177
+ "eval_steps_per_second": 2.113,
178
+ "step": 20000
179
+ },
180
+ {
181
+ "epoch": 0.16,
182
+ "learning_rate": 2.7299349779123447e-06,
183
+ "loss": 0.0926,
184
+ "step": 22000
185
+ },
186
+ {
187
+ "epoch": 0.16,
188
+ "eval_accuracy": 0.8511686278743388,
189
+ "eval_f1": 0.9032656201127709,
190
+ "eval_loss": 0.09094775468111038,
191
+ "eval_roc_auc": 0.9367924962877248,
192
+ "eval_runtime": 6579.9338,
193
+ "eval_samples_per_second": 33.786,
194
+ "eval_steps_per_second": 2.112,
195
+ "step": 22000
196
+ },
197
+ {
198
+ "epoch": 0.18,
199
+ "learning_rate": 2.978110884995285e-06,
200
+ "loss": 0.0916,
201
+ "step": 24000
202
+ },
203
+ {
204
+ "epoch": 0.18,
205
+ "eval_accuracy": 0.8545737522041096,
206
+ "eval_f1": 0.9044808139596631,
207
+ "eval_loss": 0.08910883218050003,
208
+ "eval_roc_auc": 0.9358689602259472,
209
+ "eval_runtime": 6577.1239,
210
+ "eval_samples_per_second": 33.801,
211
+ "eval_steps_per_second": 2.113,
212
+ "step": 24000
213
+ },
214
+ {
215
+ "epoch": 0.19,
216
+ "learning_rate": 3.2262867920782255e-06,
217
+ "loss": 0.0907,
218
+ "step": 26000
219
+ },
220
+ {
221
+ "epoch": 0.19,
222
+ "eval_accuracy": 0.8548076577062866,
223
+ "eval_f1": 0.9047564303992575,
224
+ "eval_loss": 0.08734705299139023,
225
+ "eval_roc_auc": 0.9352966950206927,
226
+ "eval_runtime": 6579.4149,
227
+ "eval_samples_per_second": 33.789,
228
+ "eval_steps_per_second": 2.112,
229
+ "step": 26000
230
+ },
231
+ {
232
+ "epoch": 0.21,
233
+ "learning_rate": 3.4744626991611656e-06,
234
+ "loss": 0.09,
235
+ "step": 28000
236
+ },
237
+ {
238
+ "epoch": 0.21,
239
+ "eval_accuracy": 0.8557117924358559,
240
+ "eval_f1": 0.9076670631807177,
241
+ "eval_loss": 0.08580958843231201,
242
+ "eval_roc_auc": 0.9401102386424043,
243
+ "eval_runtime": 6641.2181,
244
+ "eval_samples_per_second": 33.475,
245
+ "eval_steps_per_second": 2.092,
246
+ "step": 28000
247
+ }
248
+ ],
249
+ "max_steps": 671565,
250
+ "num_train_epochs": 5,
251
+ "total_flos": 8.35021786841088e+17,
252
+ "trial_name": null,
253
+ "trial_params": null
254
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd1257f22ca922923c53551e8989f6950a4d60387f9706ea4bc926ee079d2a65
3
+ size 2735
vocab.json ADDED
The diff for this file is too large to render. See raw diff