neavo commited on
Commit
4c780c1
Β·
verified Β·
1 Parent(s): 3b620e8
config.json CHANGED
@@ -1,14 +1,24 @@
1
  {
2
- "_name_or_path": "assets/facebookai_xlm_roberta_base_pt_20250118",
3
  "architectures": [
4
- "XLMRobertaForTokenClassification"
5
  ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "bos_token_id": 0,
8
- "classifier_dropout": null,
9
- "eos_token_id": 2,
10
- "hidden_act": "gelu",
11
- "hidden_dropout_prob": 0.1,
 
 
 
 
 
 
 
 
 
 
12
  "hidden_size": 768,
13
  "id2label": {
14
  "0": "O",
@@ -21,8 +31,9 @@
21
  "7": "B-PRD",
22
  "8": "I-PRD"
23
  },
 
24
  "initializer_range": 0.02,
25
- "intermediate_size": 3072,
26
  "label2id": {
27
  "B-LOC": 1,
28
  "B-ORG": 3,
@@ -35,17 +46,24 @@
35
  "O": 0
36
  },
37
  "layer_norm_eps": 1e-05,
38
- "max_position_embeddings": 514,
39
- "model_type": "xlm-roberta",
 
 
 
 
 
 
40
  "num_attention_heads": 12,
41
- "num_hidden_layers": 12,
42
- "output_past": true,
43
- "pad_token_id": 1,
44
  "position_embedding_type": "absolute",
45
- "reference_compile": null,
 
 
 
 
46
  "torch_dtype": "float32",
47
- "transformers_version": "4.48.0",
48
- "type_vocab_size": 1,
49
- "use_cache": true,
50
- "vocab_size": 250002
51
  }
 
1
  {
2
+ "_name_or_path": "assets/keyword_gacha_multilingual/20250128/latest",
3
  "architectures": [
4
+ "ModernBertForTokenClassification"
5
  ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 151644,
9
+ "classifier_activation": "gelu",
10
+ "classifier_bias": false,
11
+ "classifier_dropout": 0.0,
12
+ "classifier_pooling": "mean",
13
+ "cls_token_id": 151644,
14
+ "decoder_bias": true,
15
+ "deterministic_flash_attn": false,
16
+ "embedding_dropout": 0.0,
17
+ "eos_token_id": 151645,
18
+ "global_attn_every_n_layers": 3,
19
+ "global_rope_theta": 160000.0,
20
+ "gradient_checkpointing": false,
21
+ "hidden_activation": "gelu",
22
  "hidden_size": 768,
23
  "id2label": {
24
  "0": "O",
 
31
  "7": "B-PRD",
32
  "8": "I-PRD"
33
  },
34
+ "initializer_cutoff_factor": 2.0,
35
  "initializer_range": 0.02,
36
+ "intermediate_size": 1152,
37
  "label2id": {
38
  "B-LOC": 1,
39
  "B-ORG": 3,
 
46
  "O": 0
47
  },
48
  "layer_norm_eps": 1e-05,
49
+ "local_attention": 128,
50
+ "local_rope_theta": 10000.0,
51
+ "max_position_embeddings": 8192,
52
+ "mlp_bias": false,
53
+ "mlp_dropout": 0.0,
54
+ "model_type": "modernbert",
55
+ "norm_bias": false,
56
+ "norm_eps": 1e-05,
57
  "num_attention_heads": 12,
58
+ "num_hidden_layers": 22,
59
+ "pad_token_id": 151646,
 
60
  "position_embedding_type": "absolute",
61
+ "reference_compile": true,
62
+ "repad_logits_with_grad": false,
63
+ "sep_token_id": 151645,
64
+ "sparse_pred_ignore_index": -100,
65
+ "sparse_prediction": false,
66
  "torch_dtype": "float32",
67
+ "transformers_version": "4.48.1",
68
+ "vocab_size": 151680
 
 
69
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af68c037a6820c1fd1a1a8c5813aab9cafda800dff7471813c7fa853a7e79c81
3
- size 1109863956
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e9174d316c6a5bab0e17bf243e3f28cf405e6bd50921b98851067a61d589b53
3
+ size 909691780
special_tokens_map.json CHANGED
@@ -1,48 +1,34 @@
1
  {
2
- "bos_token": {
3
- "content": "<s>",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
  "cls_token": {
10
- "content": "<s>",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "eos_token": {
17
- "content": "</s>",
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
21
  "single_word": false
22
  },
23
  "mask_token": {
24
- "content": "<mask>",
25
- "lstrip": true,
26
  "normalized": false,
27
  "rstrip": false,
28
  "single_word": false
29
  },
30
  "pad_token": {
31
- "content": "<pad>",
32
  "lstrip": false,
33
  "normalized": false,
34
  "rstrip": false,
35
  "single_word": false
36
  },
37
  "sep_token": {
38
- "content": "</s>",
39
  "lstrip": false,
40
  "normalized": false,
41
  "rstrip": false,
42
  "single_word": false
43
  },
44
  "unk_token": {
45
- "content": "<unk>",
46
  "lstrip": false,
47
  "normalized": false,
48
  "rstrip": false,
 
1
  {
 
 
 
 
 
 
 
2
  "cls_token": {
3
+ "content": "[CLS]",
 
 
 
 
 
 
 
4
  "lstrip": false,
5
  "normalized": false,
6
  "rstrip": false,
7
  "single_word": false
8
  },
9
  "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
  "pad_token": {
17
+ "content": "[PAD]",
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
21
  "single_word": false
22
  },
23
  "sep_token": {
24
+ "content": "[SEP]",
25
  "lstrip": false,
26
  "normalized": false,
27
  "rstrip": false,
28
  "single_word": false
29
  },
30
  "unk_token": {
31
+ "content": "[UNK]",
32
  "lstrip": false,
33
  "normalized": false,
34
  "rstrip": false,
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a56def25aa40facc030ea8b0b87f3688e4b3c39eb8b45d5702b3a1300fe2a20
3
- size 17082734
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ad6771f60dfa1770ddaad1fe84a7134b6258294bd5357e2998defb03f5233b4
3
+ size 11426146
tokenizer_config.json CHANGED
@@ -1,56 +1,314 @@
1
  {
2
  "added_tokens_decoder": {
3
- "0": {
4
- "content": "<s>",
5
  "lstrip": false,
6
  "normalized": false,
7
  "rstrip": false,
8
  "single_word": false,
9
  "special": true
10
  },
11
- "1": {
12
- "content": "<pad>",
13
  "lstrip": false,
14
  "normalized": false,
15
  "rstrip": false,
16
  "single_word": false,
17
  "special": true
18
  },
19
- "2": {
20
- "content": "</s>",
21
  "lstrip": false,
22
  "normalized": false,
23
  "rstrip": false,
24
  "single_word": false,
25
  "special": true
26
  },
27
- "3": {
28
- "content": "<unk>",
29
  "lstrip": false,
30
  "normalized": false,
31
  "rstrip": false,
32
  "single_word": false,
33
  "special": true
34
  },
35
- "250001": {
36
- "content": "<mask>",
37
- "lstrip": true,
38
  "normalized": false,
39
  "rstrip": false,
40
  "single_word": false,
41
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  }
43
  },
44
- "bos_token": "<s>",
45
  "clean_up_tokenization_spaces": false,
46
- "cls_token": "<s>",
47
  "do_lower_case": false,
48
- "eos_token": "</s>",
49
  "extra_special_tokens": {},
50
- "mask_token": "<mask>",
51
- "model_max_length": 512,
52
- "pad_token": "<pad>",
53
- "sep_token": "</s>",
54
- "tokenizer_class": "XLMRobertaTokenizer",
55
- "unk_token": "<unk>"
 
 
 
 
56
  }
 
1
  {
2
  "added_tokens_decoder": {
3
+ "151643": {
4
+ "content": "[UNK]",
5
  "lstrip": false,
6
  "normalized": false,
7
  "rstrip": false,
8
  "single_word": false,
9
  "special": true
10
  },
11
+ "151644": {
12
+ "content": "[CLS]",
13
  "lstrip": false,
14
  "normalized": false,
15
  "rstrip": false,
16
  "single_word": false,
17
  "special": true
18
  },
19
+ "151645": {
20
+ "content": "[SEP]",
21
  "lstrip": false,
22
  "normalized": false,
23
  "rstrip": false,
24
  "single_word": false,
25
  "special": true
26
  },
27
+ "151646": {
28
+ "content": "[PAD]",
29
  "lstrip": false,
30
  "normalized": false,
31
  "rstrip": false,
32
  "single_word": false,
33
  "special": true
34
  },
35
+ "151647": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
  "normalized": false,
39
  "rstrip": false,
40
  "single_word": false,
41
  "special": true
42
+ },
43
+ "151648": {
44
+ "content": "[UNUSED_1]",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": false
50
+ },
51
+ "151649": {
52
+ "content": "[UNUSED_2]",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": false
58
+ },
59
+ "151650": {
60
+ "content": "[UNUSED_3]",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": false
66
+ },
67
+ "151651": {
68
+ "content": "[UNUSED_4]",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": false
74
+ },
75
+ "151652": {
76
+ "content": "[UNUSED_5]",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": false
82
+ },
83
+ "151653": {
84
+ "content": "[UNUSED_6]",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": false
90
+ },
91
+ "151654": {
92
+ "content": "[UNUSED_7]",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": false
98
+ },
99
+ "151655": {
100
+ "content": "[UNUSED_8]",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": false
106
+ },
107
+ "151656": {
108
+ "content": "[UNUSED_9]",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": false
114
+ },
115
+ "151657": {
116
+ "content": "[UNUSED_10]",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": false
122
+ },
123
+ "151658": {
124
+ "content": "[UNUSED_11]",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": false
130
+ },
131
+ "151659": {
132
+ "content": "[UNUSED_12]",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": false
138
+ },
139
+ "151660": {
140
+ "content": "[UNUSED_13]",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": false
146
+ },
147
+ "151661": {
148
+ "content": "[UNUSED_14]",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": false
154
+ },
155
+ "151662": {
156
+ "content": "[UNUSED_15]",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": false
162
+ },
163
+ "151663": {
164
+ "content": "[UNUSED_16]",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": false
170
+ },
171
+ "151664": {
172
+ "content": "[UNUSED_17]",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": false
178
+ },
179
+ "151665": {
180
+ "content": "[UNUSED_18]",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": false
186
+ },
187
+ "151666": {
188
+ "content": "[UNUSED_19]",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": false
194
+ },
195
+ "151667": {
196
+ "content": "[UNUSED_20]",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": false
202
+ },
203
+ "151668": {
204
+ "content": "[UNUSED_21]",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": false
210
+ },
211
+ "151669": {
212
+ "content": "[UNUSED_22]",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": false
218
+ },
219
+ "151670": {
220
+ "content": "[UNUSED_23]",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": false
226
+ },
227
+ "151671": {
228
+ "content": "[UNUSED_24]",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": false
234
+ },
235
+ "151672": {
236
+ "content": "[UNUSED_25]",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": false
242
+ },
243
+ "151673": {
244
+ "content": "[UNUSED_26]",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": false
250
+ },
251
+ "151674": {
252
+ "content": "[UNUSED_27]",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": false
258
+ },
259
+ "151675": {
260
+ "content": "[UNUSED_28]",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": false
266
+ },
267
+ "151676": {
268
+ "content": "[UNUSED_29]",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": false
274
+ },
275
+ "151677": {
276
+ "content": "[UNUSED_30]",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": false
282
+ },
283
+ "151678": {
284
+ "content": "[UNUSED_31]",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": false
290
+ },
291
+ "151679": {
292
+ "content": "[UNUSED_32]",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": false
298
  }
299
  },
 
300
  "clean_up_tokenization_spaces": false,
301
+ "cls_token": "[CLS]",
302
  "do_lower_case": false,
 
303
  "extra_special_tokens": {},
304
+ "mask_token": "[MASK]",
305
+ "model_input_names": [
306
+ "input_ids",
307
+ "attention_mask"
308
+ ],
309
+ "model_max_length": 8192,
310
+ "pad_token": "[PAD]",
311
+ "sep_token": "[SEP]",
312
+ "tokenizer_class": "PreTrainedTokenizerFast",
313
+ "unk_token": "[UNK]"
314
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:589556952a5a1690aef5fb3da305f9e1a5c2f80fa35fb293da649920d81b9dc9
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f42ffad9e204a709502a4ff4e94a33f6cceee08cd06587f23c454d3937dc5ed9
3
  size 5368
training_args.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "output_dir": "output",
3
  "overwrite_output_dir": false,
4
  "do_train": false,
5
  "do_eval": true,
@@ -14,25 +14,22 @@
14
  "eval_accumulation_steps": null,
15
  "eval_delay": 0,
16
  "torch_empty_cache_steps": null,
17
- "learning_rate": 8e-06,
18
- "weight_decay": 0.01,
19
  "adam_beta1": 0.9,
20
  "adam_beta2": 0.999,
21
  "adam_epsilon": 1e-08,
22
  "max_grad_norm": 1.0,
23
  "num_train_epochs": 3.0,
24
- "max_steps": 12500,
25
- "lr_scheduler_type": "warmup_stable_decay",
26
- "lr_scheduler_kwargs": {
27
- "num_decay_steps": 1251,
28
- "num_stable_steps": 10001
29
- },
30
- "warmup_ratio": 0.1,
31
- "warmup_steps": 0,
32
  "log_level": "passive",
33
  "log_level_replica": "warning",
34
  "log_on_each_node": true,
35
- "logging_dir": "output/runs/Jan19_16-42-35_Neavo-PC",
36
  "logging_strategy": "steps",
37
  "logging_first_step": false,
38
  "logging_steps": 5,
@@ -64,11 +61,11 @@
64
  "tpu_metrics_debug": false,
65
  "debug": [],
66
  "dataloader_drop_last": false,
67
- "eval_steps": 300,
68
- "dataloader_num_workers": 0,
69
  "dataloader_prefetch_factor": null,
70
  "past_index": -1,
71
- "run_name": "output",
72
  "disable_tqdm": false,
73
  "remove_unused_columns": true,
74
  "label_names": null,
@@ -133,8 +130,8 @@
133
  "torchdynamo": null,
134
  "ray_scope": "last",
135
  "ddp_timeout": 1800,
136
- "torch_compile": true,
137
- "torch_compile_backend": "inductor",
138
  "torch_compile_mode": null,
139
  "dispatch_batches": null,
140
  "split_batches": null,
 
1
  {
2
+ "output_dir": "output/keyword_gacha_multilingual_ner/20250131_6e5_cosine",
3
  "overwrite_output_dir": false,
4
  "do_train": false,
5
  "do_eval": true,
 
14
  "eval_accumulation_steps": null,
15
  "eval_delay": 0,
16
  "torch_empty_cache_steps": null,
17
+ "learning_rate": 6.000000000000001e-05,
18
+ "weight_decay": 1e-05,
19
  "adam_beta1": 0.9,
20
  "adam_beta2": 0.999,
21
  "adam_epsilon": 1e-08,
22
  "max_grad_norm": 1.0,
23
  "num_train_epochs": 3.0,
24
+ "max_steps": 7500,
25
+ "lr_scheduler_type": "cosine",
26
+ "lr_scheduler_kwargs": {},
27
+ "warmup_ratio": 0.0,
28
+ "warmup_steps": 750,
 
 
 
29
  "log_level": "passive",
30
  "log_level_replica": "warning",
31
  "log_on_each_node": true,
32
+ "logging_dir": "output/keyword_gacha_multilingual_ner/20250131_6e5_cosine/runs/Jan31_13-51-32_Neavo-PC",
33
  "logging_strategy": "steps",
34
  "logging_first_step": false,
35
  "logging_steps": 5,
 
61
  "tpu_metrics_debug": false,
62
  "debug": [],
63
  "dataloader_drop_last": false,
64
+ "eval_steps": 200,
65
+ "dataloader_num_workers": 8,
66
  "dataloader_prefetch_factor": null,
67
  "past_index": -1,
68
+ "run_name": "output/keyword_gacha_multilingual_ner/20250131_6e5_cosine",
69
  "disable_tqdm": false,
70
  "remove_unused_columns": true,
71
  "label_names": null,
 
130
  "torchdynamo": null,
131
  "ray_scope": "last",
132
  "ddp_timeout": 1800,
133
+ "torch_compile": false,
134
+ "torch_compile_backend": null,
135
  "torch_compile_mode": null,
136
  "dispatch_batches": null,
137
  "split_batches": null,