Zekun Wu commited on
Commit
e5e9476
1 Parent(s): fb254e5

update new model

Browse files
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
config.json CHANGED
@@ -41,6 +41,6 @@
41
  "sinusoidal_pos_embds": false,
42
  "tie_weights_": true,
43
  "torch_dtype": "float32",
44
- "transformers_version": "4.16.2",
45
  "vocab_size": 30522
46
  }
 
41
  "sinusoidal_pos_embds": false,
42
  "tie_weights_": true,
43
  "torch_dtype": "float32",
44
+ "transformers_version": "4.31.0.dev0",
45
  "vocab_size": 30522
46
  }
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4fc301f3565f1f66dcc2f181d9282ed55ed442c52baa544c2581bdc8a61f2953
3
- size 531014313
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:189f1548d795d3644df5e1b50a29a1486d30f17b5cd55c1b8f8434168117565f
3
+ size 531015301
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:244b8ae0a16e42ca098bf10fcbd969892e6cbcf3e0a7f05a4ec52fb752db3ea1
3
- size 265517173
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfc8b8e20dea860c2d6daf7d75cc1ed1c2deaaf02b47d2669558a5a6439d4a5f
3
+ size 265512613
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c80dcae06b0c0549c4ad1ac0024ccb896e9245726737652c176de5d386a600c8
3
- size 13547
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e6b402c9f12d6e11514556247eb556837d54d1ce0345832ec974e0a7a504007
3
+ size 13553
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a7307711a3ffe0a038b57c3070c813da888b248e0bbf62ba06ac99ee91aa9d6
3
- size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1c1b5f8640b2c97cbad8c639ab7713c2c33f9df75adcc4050de3b5dad279f3c
3
+ size 627
special_tokens_map.json CHANGED
@@ -1 +1,7 @@
1
- {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json CHANGED
@@ -1,6 +1,11 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 512,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
  "padding": null,
10
  "added_tokens": [
11
  {
tokenizer_config.json CHANGED
@@ -1 +1,13 @@
1
- {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "distilbert-base-uncased", "tokenizer_class": "DistilBertTokenizer"}
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "clean_up_tokenization_spaces": true,
3
+ "cls_token": "[CLS]",
4
+ "do_lower_case": true,
5
+ "mask_token": "[MASK]",
6
+ "model_max_length": 512,
7
+ "pad_token": "[PAD]",
8
+ "sep_token": "[SEP]",
9
+ "strip_accents": null,
10
+ "tokenize_chinese_chars": true,
11
+ "tokenizer_class": "DistilBertTokenizer",
12
+ "unk_token": "[UNK]"
13
+ }
trainer_state.json CHANGED
@@ -1,202 +1,100 @@
1
  {
2
- "best_metric": 0.07052170485258102,
3
- "best_model_checkpoint": "token_level_model/best_model/checkpoint-948",
4
- "epoch": 12.0,
5
- "global_step": 3792,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 1.0,
12
- "eval_balanced accuracy": 0.5633085631676005,
13
- "eval_f1": 0.5303163390780957,
14
- "eval_loss": 0.09163307398557663,
15
- "eval_precision": 0.6548165588282941,
16
- "eval_recall": 0.5633085631676005,
17
- "eval_runtime": 5.3367,
18
- "eval_samples_per_second": 236.852,
19
- "eval_steps_per_second": 14.803,
20
- "step": 316
21
- },
22
- {
23
- "epoch": 1.58,
24
- "learning_rate": 1.7362869198312237e-05,
25
- "loss": 0.1839,
26
  "step": 500
27
  },
28
  {
29
- "epoch": 2.0,
30
- "eval_balanced accuracy": 0.7195902052549533,
31
- "eval_f1": 0.7255288809894066,
32
- "eval_loss": 0.0722324550151825,
33
- "eval_precision": 0.7454575741625619,
34
- "eval_recall": 0.7195902052549533,
35
- "eval_runtime": 5.3534,
36
- "eval_samples_per_second": 236.111,
37
- "eval_steps_per_second": 14.757,
38
- "step": 632
39
- },
40
- {
41
- "epoch": 3.0,
42
- "eval_balanced accuracy": 0.7147400555506803,
43
- "eval_f1": 0.7192305114894215,
44
- "eval_loss": 0.07052170485258102,
45
- "eval_precision": 0.7756273409712318,
46
- "eval_recall": 0.7147400555506803,
47
- "eval_runtime": 5.4755,
48
- "eval_samples_per_second": 230.847,
49
- "eval_steps_per_second": 14.428,
50
- "step": 948
51
- },
52
- {
53
- "epoch": 3.16,
54
- "learning_rate": 1.4725738396624474e-05,
55
- "loss": 0.0587,
56
  "step": 1000
57
  },
58
  {
59
- "epoch": 4.0,
60
- "eval_balanced accuracy": 0.7295185936273012,
61
- "eval_f1": 0.7296703416168215,
62
- "eval_loss": 0.07182055711746216,
63
- "eval_precision": 0.7401045017667032,
64
- "eval_recall": 0.7295185936273012,
65
- "eval_runtime": 5.3937,
66
- "eval_samples_per_second": 234.345,
67
- "eval_steps_per_second": 14.647,
68
- "step": 1264
69
- },
70
- {
71
- "epoch": 4.75,
72
- "learning_rate": 1.208860759493671e-05,
73
- "loss": 0.0376,
74
  "step": 1500
75
  },
76
  {
77
- "epoch": 5.0,
78
- "eval_balanced accuracy": 0.735330002568732,
79
- "eval_f1": 0.743746101471838,
80
- "eval_loss": 0.07561135292053223,
81
- "eval_precision": 0.758929084908083,
82
- "eval_recall": 0.735330002568732,
83
- "eval_runtime": 5.5304,
84
- "eval_samples_per_second": 228.555,
85
- "eval_steps_per_second": 14.285,
86
- "step": 1580
87
- },
88
- {
89
- "epoch": 6.0,
90
- "eval_balanced accuracy": 0.7487913019838482,
91
- "eval_f1": 0.7539294881409199,
92
- "eval_loss": 0.08333344757556915,
93
- "eval_precision": 0.7640729935139194,
94
- "eval_recall": 0.7487913019838482,
95
- "eval_runtime": 5.304,
96
- "eval_samples_per_second": 238.312,
97
- "eval_steps_per_second": 14.894,
98
- "step": 1896
99
- },
100
- {
101
- "epoch": 6.33,
102
- "learning_rate": 9.451476793248946e-06,
103
- "loss": 0.0239,
104
  "step": 2000
105
  },
106
  {
107
- "epoch": 7.0,
108
- "eval_balanced accuracy": 0.7495077050154062,
109
- "eval_f1": 0.7547220689413356,
110
- "eval_loss": 0.09176070988178253,
111
- "eval_precision": 0.7680132999431392,
112
- "eval_recall": 0.7495077050154062,
113
- "eval_runtime": 5.4894,
114
- "eval_samples_per_second": 230.262,
115
- "eval_steps_per_second": 14.391,
116
- "step": 2212
117
- },
118
- {
119
- "epoch": 7.91,
120
- "learning_rate": 6.814345991561182e-06,
121
- "loss": 0.0161,
122
  "step": 2500
123
  },
124
  {
125
- "epoch": 8.0,
126
- "eval_balanced accuracy": 0.7519480763726148,
127
- "eval_f1": 0.7441340002103095,
128
- "eval_loss": 0.10090441256761551,
129
- "eval_precision": 0.7421918161304624,
130
- "eval_recall": 0.7519480763726148,
131
- "eval_runtime": 5.4772,
132
- "eval_samples_per_second": 230.774,
133
- "eval_steps_per_second": 14.423,
134
- "step": 2528
135
- },
136
- {
137
- "epoch": 9.0,
138
- "eval_balanced accuracy": 0.7372305744818235,
139
- "eval_f1": 0.7418663358868686,
140
- "eval_loss": 0.10627683997154236,
141
- "eval_precision": 0.747694948865169,
142
- "eval_recall": 0.7372305744818235,
143
- "eval_runtime": 5.673,
144
- "eval_samples_per_second": 222.809,
145
- "eval_steps_per_second": 13.926,
146
- "step": 2844
147
- },
148
- {
149
- "epoch": 9.49,
150
- "learning_rate": 4.177215189873418e-06,
151
- "loss": 0.0107,
152
  "step": 3000
153
  },
154
  {
155
- "epoch": 10.0,
156
- "eval_balanced accuracy": 0.7454153105654866,
157
- "eval_f1": 0.7532278014935634,
158
- "eval_loss": 0.11291743814945221,
159
- "eval_precision": 0.7635910633921945,
160
- "eval_recall": 0.7454153105654866,
161
- "eval_runtime": 5.7497,
162
- "eval_samples_per_second": 219.839,
163
- "eval_steps_per_second": 13.74,
164
- "step": 3160
165
- },
166
- {
167
- "epoch": 11.0,
168
- "eval_balanced accuracy": 0.7422514651185799,
169
- "eval_f1": 0.7462413455365297,
170
- "eval_loss": 0.11802595853805542,
171
- "eval_precision": 0.7518280300030182,
172
- "eval_recall": 0.7422514651185799,
173
- "eval_runtime": 5.6277,
174
- "eval_samples_per_second": 224.602,
175
- "eval_steps_per_second": 14.038,
176
- "step": 3476
177
  },
178
  {
179
- "epoch": 11.08,
180
- "learning_rate": 1.5400843881856542e-06,
181
- "loss": 0.007,
182
- "step": 3500
183
  },
184
  {
185
- "epoch": 12.0,
186
- "eval_balanced accuracy": 0.7397630177088332,
187
- "eval_f1": 0.7454360643197575,
188
- "eval_loss": 0.11985792219638824,
189
- "eval_precision": 0.7526407260582226,
190
- "eval_recall": 0.7397630177088332,
191
- "eval_runtime": 5.7001,
192
- "eval_samples_per_second": 221.752,
193
- "eval_steps_per_second": 13.859,
194
- "step": 3792
195
  }
196
  ],
197
- "max_steps": 3792,
198
- "num_train_epochs": 12,
199
- "total_flos": 302819736843288.0,
200
  "trial_name": null,
201
  "trial_params": null
202
  }
 
1
  {
2
+ "best_metric": 0.03554883599281311,
3
+ "best_model_checkpoint": "MD_TL_best_model/checkpoint-4089",
4
+ "epoch": 3.0,
5
+ "global_step": 4089,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.37,
12
+ "learning_rate": 1.8777207141110298e-05,
13
+ "loss": 0.1048,
 
 
 
 
 
 
 
 
 
 
 
 
14
  "step": 500
15
  },
16
  {
17
+ "epoch": 0.73,
18
+ "learning_rate": 1.7554414282220594e-05,
19
+ "loss": 0.0502,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  "step": 1000
21
  },
22
  {
23
+ "epoch": 1.0,
24
+ "eval_balanced accuracy": 0.7151924088243289,
25
+ "eval_f1": 0.7168079195427047,
26
+ "eval_loss": 0.04138244688510895,
27
+ "eval_precision": 0.7342695204092529,
28
+ "eval_recall": 0.7151924088243289,
29
+ "eval_runtime": 9.9389,
30
+ "eval_samples_per_second": 548.553,
31
+ "eval_steps_per_second": 34.31,
32
+ "step": 1363
33
+ },
34
+ {
35
+ "epoch": 1.1,
36
+ "learning_rate": 1.633162142333089e-05,
37
+ "loss": 0.0401,
38
  "step": 1500
39
  },
40
  {
41
+ "epoch": 1.47,
42
+ "learning_rate": 1.5108828564441186e-05,
43
+ "loss": 0.0326,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  "step": 2000
45
  },
46
  {
47
+ "epoch": 1.83,
48
+ "learning_rate": 1.3886035705551482e-05,
49
+ "loss": 0.0311,
 
 
 
 
 
 
 
 
 
 
 
 
50
  "step": 2500
51
  },
52
  {
53
+ "epoch": 2.0,
54
+ "eval_balanced accuracy": 0.7398895952902634,
55
+ "eval_f1": 0.7495616067630219,
56
+ "eval_loss": 0.03639577701687813,
57
+ "eval_precision": 0.7655480981233173,
58
+ "eval_recall": 0.7398895952902634,
59
+ "eval_runtime": 4.3809,
60
+ "eval_samples_per_second": 1244.494,
61
+ "eval_steps_per_second": 77.838,
62
+ "step": 2726
63
+ },
64
+ {
65
+ "epoch": 2.2,
66
+ "learning_rate": 1.2663242846661777e-05,
67
+ "loss": 0.025,
 
 
 
 
 
 
 
 
 
 
 
 
68
  "step": 3000
69
  },
70
  {
71
+ "epoch": 2.57,
72
+ "learning_rate": 1.1440449987772073e-05,
73
+ "loss": 0.0222,
74
+ "step": 3500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  },
76
  {
77
+ "epoch": 2.93,
78
+ "learning_rate": 1.0217657128882368e-05,
79
+ "loss": 0.0222,
80
+ "step": 4000
81
  },
82
  {
83
+ "epoch": 3.0,
84
+ "eval_balanced accuracy": 0.7662314481801649,
85
+ "eval_f1": 0.7739129932274338,
86
+ "eval_loss": 0.03554883599281311,
87
+ "eval_precision": 0.7868185694908753,
88
+ "eval_recall": 0.7662314481801649,
89
+ "eval_runtime": 4.3603,
90
+ "eval_samples_per_second": 1250.36,
91
+ "eval_steps_per_second": 78.205,
92
+ "step": 4089
93
  }
94
  ],
95
+ "max_steps": 8178,
96
+ "num_train_epochs": 6,
97
+ "total_flos": 355494913244352.0,
98
  "trial_name": null,
99
  "trial_params": null
100
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da42e7baa76de3c12fa9e7243405605b228d1c4584da43c52ec2fc2947df75f5
3
- size 3055
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b6e9aa13ae78015b3f03ad4ff668efbdebb9803fbdeefab5cace9c334a8bc7e
3
+ size 3963