PaulTran commited on
Commit
187dea8
1 Parent(s): 270714c

Upload 12 files

Browse files
added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "<mask>": 64000
3
+ }
bpe.codes ADDED
The diff for this file is too large to render. See raw diff
 
config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "vinai/phobert-base",
3
+ "architectures": [
4
+ "RobertaForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "gradient_checkpointing": false,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "id2label": {
15
+ "0": "Nghị luận",
16
+ "1": "Thuyết minh",
17
+ "2": "Biểu cảm",
18
+ "3": "Tự sự",
19
+ "4": "Miêu tả"
20
+ },
21
+ "initializer_range": 0.02,
22
+ "intermediate_size": 3072,
23
+ "label2id": {
24
+ "Nghị luận": 0,
25
+ "Thuyết minh": 1,
26
+ "Biểu cảm": 2,
27
+ "Tự sự": 3,
28
+ "Miêu tả": 4
29
+ },
30
+ "layer_norm_eps": 1e-05,
31
+ "max_position_embeddings": 258,
32
+ "model_type": "roberta",
33
+ "num_attention_heads": 12,
34
+ "num_hidden_layers": 12,
35
+ "pad_token_id": 1,
36
+ "position_embedding_type": "absolute",
37
+ "problem_type": "multi_label_classification",
38
+ "tokenizer_class": "PhobertTokenizer",
39
+ "torch_dtype": "float32",
40
+ "transformers_version": "4.24.0",
41
+ "type_vocab_size": 1,
42
+ "use_cache": true,
43
+ "vocab_size": 64001
44
+ }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f32dcb90aa01001643c43c393d345b2c9a8fae109c15a27f50542b575ed686b
3
+ size 1080134789
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbf931a4d45311c93aa1dfa73e1d473d8997b0d535a5dde0a83d88e01596c1bc
3
+ size 540079797
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b0d6bc36c56d42f244ea956d510009d6ca9533a9b293499c592fcc7a2630534
3
+ size 14575
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e28c4bfd97129322dd36a628a7bc7ddbc53d32042596ec941093555d8f0789b9
3
+ size 627
special_tokens_map.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": "<mask>",
6
+ "pad_token": "<pad>",
7
+ "sep_token": "</s>",
8
+ "unk_token": "<unk>"
9
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": "<mask>",
6
+ "model_max_length": 256,
7
+ "name_or_path": "vinai/phobert-base",
8
+ "pad_token": "<pad>",
9
+ "sep_token": "</s>",
10
+ "special_tokens_map_file": null,
11
+ "tokenizer_class": "PhobertTokenizer",
12
+ "unk_token": "<unk>"
13
+ }
trainer_state.json ADDED
@@ -0,0 +1,237 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8468692367302207,
3
+ "best_model_checkpoint": "PhoBertPunc\\checkpoint-5746",
4
+ "epoch": 13.0,
5
+ "global_step": 5746,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.0,
12
+ "learning_rate": 1.9002262443438914e-05,
13
+ "loss": 0.3702,
14
+ "step": 441
15
+ },
16
+ {
17
+ "epoch": 1.0,
18
+ "eval_accuracy": 0.6371049949031601,
19
+ "eval_f1": 0.7381062355658198,
20
+ "eval_loss": 0.285582959651947,
21
+ "eval_roc_auc": 0.8207916729455642,
22
+ "eval_runtime": 9.0929,
23
+ "eval_samples_per_second": 107.886,
24
+ "eval_steps_per_second": 5.499,
25
+ "step": 442
26
+ },
27
+ {
28
+ "epoch": 2.0,
29
+ "learning_rate": 1.800452488687783e-05,
30
+ "loss": 0.2734,
31
+ "step": 882
32
+ },
33
+ {
34
+ "epoch": 2.0,
35
+ "eval_accuracy": 0.6605504587155964,
36
+ "eval_f1": 0.7523277467411547,
37
+ "eval_loss": 0.26946958899497986,
38
+ "eval_roc_auc": 0.828197000778661,
39
+ "eval_runtime": 7.7509,
40
+ "eval_samples_per_second": 126.566,
41
+ "eval_steps_per_second": 6.451,
42
+ "step": 884
43
+ },
44
+ {
45
+ "epoch": 2.99,
46
+ "learning_rate": 1.7006787330316743e-05,
47
+ "loss": 0.2267,
48
+ "step": 1323
49
+ },
50
+ {
51
+ "epoch": 3.0,
52
+ "eval_accuracy": 0.6595310907237513,
53
+ "eval_f1": 0.7564625850340136,
54
+ "eval_loss": 0.27753132581710815,
55
+ "eval_roc_auc": 0.8355058707497308,
56
+ "eval_runtime": 7.6581,
57
+ "eval_samples_per_second": 128.1,
58
+ "eval_steps_per_second": 6.529,
59
+ "step": 1326
60
+ },
61
+ {
62
+ "epoch": 3.99,
63
+ "learning_rate": 1.600904977375566e-05,
64
+ "loss": 0.1876,
65
+ "step": 1764
66
+ },
67
+ {
68
+ "epoch": 4.0,
69
+ "eval_accuracy": 0.6371049949031601,
70
+ "eval_f1": 0.763459841129744,
71
+ "eval_loss": 0.29114222526550293,
72
+ "eval_roc_auc": 0.8451443088928317,
73
+ "eval_runtime": 7.891,
74
+ "eval_samples_per_second": 124.318,
75
+ "eval_steps_per_second": 6.336,
76
+ "step": 1768
77
+ },
78
+ {
79
+ "epoch": 4.99,
80
+ "learning_rate": 1.5011312217194571e-05,
81
+ "loss": 0.1572,
82
+ "step": 2205
83
+ },
84
+ {
85
+ "epoch": 5.0,
86
+ "eval_accuracy": 0.6371049949031601,
87
+ "eval_f1": 0.7529827662395051,
88
+ "eval_loss": 0.31292596459388733,
89
+ "eval_roc_auc": 0.8381052176821842,
90
+ "eval_runtime": 7.5397,
91
+ "eval_samples_per_second": 130.111,
92
+ "eval_steps_per_second": 6.632,
93
+ "step": 2210
94
+ },
95
+ {
96
+ "epoch": 5.99,
97
+ "learning_rate": 1.4013574660633484e-05,
98
+ "loss": 0.1312,
99
+ "step": 2646
100
+ },
101
+ {
102
+ "epoch": 6.0,
103
+ "eval_accuracy": 0.6167176350662589,
104
+ "eval_f1": 0.7478632478632479,
105
+ "eval_loss": 0.3474615812301636,
106
+ "eval_roc_auc": 0.8410431585366024,
107
+ "eval_runtime": 8.8902,
108
+ "eval_samples_per_second": 110.346,
109
+ "eval_steps_per_second": 5.624,
110
+ "step": 2652
111
+ },
112
+ {
113
+ "epoch": 6.98,
114
+ "learning_rate": 1.30158371040724e-05,
115
+ "loss": 0.1048,
116
+ "step": 3087
117
+ },
118
+ {
119
+ "epoch": 7.0,
120
+ "eval_accuracy": 0.6187563710499491,
121
+ "eval_f1": 0.7460869565217391,
122
+ "eval_loss": 0.3677310347557068,
123
+ "eval_roc_auc": 0.8366267414318126,
124
+ "eval_runtime": 7.6489,
125
+ "eval_samples_per_second": 128.254,
126
+ "eval_steps_per_second": 6.537,
127
+ "step": 3094
128
+ },
129
+ {
130
+ "epoch": 7.98,
131
+ "learning_rate": 1.2018099547511313e-05,
132
+ "loss": 0.0844,
133
+ "step": 3528
134
+ },
135
+ {
136
+ "epoch": 8.0,
137
+ "eval_accuracy": 0.6034658511722731,
138
+ "eval_f1": 0.7418244406196213,
139
+ "eval_loss": 0.39172324538230896,
140
+ "eval_roc_auc": 0.835729578342679,
141
+ "eval_runtime": 7.4868,
142
+ "eval_samples_per_second": 131.031,
143
+ "eval_steps_per_second": 6.678,
144
+ "step": 3536
145
+ },
146
+ {
147
+ "epoch": 8.98,
148
+ "learning_rate": 1.1020361990950229e-05,
149
+ "loss": 0.0698,
150
+ "step": 3969
151
+ },
152
+ {
153
+ "epoch": 9.0,
154
+ "eval_accuracy": 0.6106014271151886,
155
+ "eval_f1": 0.7507481829841812,
156
+ "eval_loss": 0.42587536573410034,
157
+ "eval_roc_auc": 0.842892186936503,
158
+ "eval_runtime": 7.5353,
159
+ "eval_samples_per_second": 130.188,
160
+ "eval_steps_per_second": 6.635,
161
+ "step": 3978
162
+ },
163
+ {
164
+ "epoch": 9.98,
165
+ "learning_rate": 1.0022624434389141e-05,
166
+ "loss": 0.054,
167
+ "step": 4410
168
+ },
169
+ {
170
+ "epoch": 10.0,
171
+ "eval_accuracy": 0.5953109072375128,
172
+ "eval_f1": 0.7450317124735729,
173
+ "eval_loss": 0.4620385468006134,
174
+ "eval_roc_auc": 0.8411574616862839,
175
+ "eval_runtime": 7.8298,
176
+ "eval_samples_per_second": 125.29,
177
+ "eval_steps_per_second": 6.386,
178
+ "step": 4420
179
+ },
180
+ {
181
+ "epoch": 10.98,
182
+ "learning_rate": 9.024886877828056e-06,
183
+ "loss": 0.0451,
184
+ "step": 4851
185
+ },
186
+ {
187
+ "epoch": 11.0,
188
+ "eval_accuracy": 0.6330275229357798,
189
+ "eval_f1": 0.7534965034965035,
190
+ "eval_loss": 0.45941799879074097,
191
+ "eval_roc_auc": 0.8405079165428382,
192
+ "eval_runtime": 7.6958,
193
+ "eval_samples_per_second": 127.473,
194
+ "eval_steps_per_second": 6.497,
195
+ "step": 4862
196
+ },
197
+ {
198
+ "epoch": 11.97,
199
+ "learning_rate": 8.02714932126697e-06,
200
+ "loss": 0.0366,
201
+ "step": 5292
202
+ },
203
+ {
204
+ "epoch": 12.0,
205
+ "eval_accuracy": 0.5891946992864424,
206
+ "eval_f1": 0.7466777408637875,
207
+ "eval_loss": 0.5142910480499268,
208
+ "eval_roc_auc": 0.8457477828688038,
209
+ "eval_runtime": 7.657,
210
+ "eval_samples_per_second": 128.118,
211
+ "eval_steps_per_second": 6.53,
212
+ "step": 5304
213
+ },
214
+ {
215
+ "epoch": 12.97,
216
+ "learning_rate": 7.029411764705882e-06,
217
+ "loss": 0.0288,
218
+ "step": 5733
219
+ },
220
+ {
221
+ "epoch": 13.0,
222
+ "eval_accuracy": 0.601427115188583,
223
+ "eval_f1": 0.751892346509672,
224
+ "eval_loss": 0.5171791315078735,
225
+ "eval_roc_auc": 0.8468692367302207,
226
+ "eval_runtime": 7.4917,
227
+ "eval_samples_per_second": 130.944,
228
+ "eval_steps_per_second": 6.674,
229
+ "step": 5746
230
+ }
231
+ ],
232
+ "max_steps": 8840,
233
+ "num_train_epochs": 20,
234
+ "total_flos": 1.5096534982943232e+16,
235
+ "trial_name": null,
236
+ "trial_params": null
237
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b683331f34c2334ea6a233da082fed9b162450b5f05707f858f89919f8c4c79
3
+ size 3387
vocab.txt ADDED
The diff for this file is too large to render. See raw diff