neavo commited on
Commit
0e5a816
·
verified ·
1 Parent(s): 8b7888b
config.json CHANGED
@@ -1,29 +1,46 @@
1
  {
2
- "_name_or_path": "assets/facebookai_xlm_roberta_base",
3
  "architectures": [
4
- "XLMRobertaForMaskedLM"
5
  ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "bos_token_id": 0,
8
- "classifier_dropout": null,
9
- "eos_token_id": 2,
10
- "hidden_act": "gelu",
11
- "hidden_dropout_prob": 0.1,
 
 
 
 
 
 
 
 
 
 
12
  "hidden_size": 768,
 
13
  "initializer_range": 0.02,
14
- "intermediate_size": 3072,
15
  "layer_norm_eps": 1e-05,
16
- "max_position_embeddings": 514,
17
- "model_type": "xlm-roberta",
 
 
 
 
 
 
18
  "num_attention_heads": 12,
19
- "num_hidden_layers": 12,
20
- "output_past": true,
21
- "pad_token_id": 1,
22
  "position_embedding_type": "absolute",
23
- "reference_compile": null,
 
 
 
24
  "torch_dtype": "float32",
25
- "transformers_version": "4.48.0",
26
- "type_vocab_size": 1,
27
- "use_cache": true,
28
- "vocab_size": 250002
29
  }
 
1
  {
2
+ "_name_or_path": "assets/modern_bert_multilingual/20250128/nodecay",
3
  "architectures": [
4
+ "ModernBertForMaskedLM"
5
  ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 151644,
9
+ "classifier_activation": "gelu",
10
+ "classifier_bias": false,
11
+ "classifier_dropout": 0.0,
12
+ "classifier_pooling": "mean",
13
+ "cls_token_id": 151644,
14
+ "decoder_bias": true,
15
+ "deterministic_flash_attn": false,
16
+ "embedding_dropout": 0.0,
17
+ "eos_token_id": 151645,
18
+ "global_attn_every_n_layers": 3,
19
+ "global_rope_theta": 160000.0,
20
+ "gradient_checkpointing": false,
21
+ "hidden_activation": "gelu",
22
  "hidden_size": 768,
23
+ "initializer_cutoff_factor": 2.0,
24
  "initializer_range": 0.02,
25
+ "intermediate_size": 1152,
26
  "layer_norm_eps": 1e-05,
27
+ "local_attention": 128,
28
+ "local_rope_theta": 10000.0,
29
+ "max_position_embeddings": 8192,
30
+ "mlp_bias": false,
31
+ "mlp_dropout": 0.0,
32
+ "model_type": "modernbert",
33
+ "norm_bias": false,
34
+ "norm_eps": 1e-05,
35
  "num_attention_heads": 12,
36
+ "num_hidden_layers": 22,
37
+ "pad_token_id": 151646,
 
38
  "position_embedding_type": "absolute",
39
+ "repad_logits_with_grad": false,
40
+ "sep_token_id": 151645,
41
+ "sparse_pred_ignore_index": -100,
42
+ "sparse_prediction": false,
43
  "torch_dtype": "float32",
44
+ "transformers_version": "4.48.1",
45
+ "vocab_size": 151680
 
 
46
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2e50fb6e5cb06134cfd7be0ba9a6e01675ef132bdca1718c3aecc1c32fb842a
3
- size 1113205088
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eaef7911078aa9b1d61398ac0e6a1125853bacf37ebe55eb97f6d322f3c2a5d5
3
+ size 910270752
special_tokens_map.json CHANGED
@@ -1,15 +1,37 @@
1
  {
2
- "bos_token": "<s>",
3
- "cls_token": "<s>",
4
- "eos_token": "</s>",
 
 
 
 
5
  "mask_token": {
6
- "content": "<mask>",
7
- "lstrip": true,
 
 
 
 
 
 
 
8
  "normalized": false,
9
  "rstrip": false,
10
  "single_word": false
11
  },
12
- "pad_token": "<pad>",
13
- "sep_token": "</s>",
14
- "unk_token": "<unk>"
 
 
 
 
 
 
 
 
 
 
 
15
  }
 
1
  {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
  "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
21
  "single_word": false
22
  },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
  }
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a56def25aa40facc030ea8b0b87f3688e4b3c39eb8b45d5702b3a1300fe2a20
3
- size 17082734
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ad6771f60dfa1770ddaad1fe84a7134b6258294bd5357e2998defb03f5233b4
3
+ size 11426146
tokenizer_config.json CHANGED
@@ -1,56 +1,314 @@
1
  {
2
  "added_tokens_decoder": {
3
- "0": {
4
- "content": "<s>",
5
  "lstrip": false,
6
  "normalized": false,
7
  "rstrip": false,
8
  "single_word": false,
9
  "special": true
10
  },
11
- "1": {
12
- "content": "<pad>",
13
  "lstrip": false,
14
  "normalized": false,
15
  "rstrip": false,
16
  "single_word": false,
17
  "special": true
18
  },
19
- "2": {
20
- "content": "</s>",
21
  "lstrip": false,
22
  "normalized": false,
23
  "rstrip": false,
24
  "single_word": false,
25
  "special": true
26
  },
27
- "3": {
28
- "content": "<unk>",
29
  "lstrip": false,
30
  "normalized": false,
31
  "rstrip": false,
32
  "single_word": false,
33
  "special": true
34
  },
35
- "250001": {
36
- "content": "<mask>",
37
- "lstrip": true,
38
  "normalized": false,
39
  "rstrip": false,
40
  "single_word": false,
41
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  }
43
  },
44
- "bos_token": "<s>",
45
  "clean_up_tokenization_spaces": false,
46
- "cls_token": "<s>",
47
  "do_lower_case": false,
48
- "eos_token": "</s>",
49
  "extra_special_tokens": {},
50
- "mask_token": "<mask>",
51
- "model_max_length": 512,
52
- "pad_token": "<pad>",
53
- "sep_token": "</s>",
54
- "tokenizer_class": "XLMRobertaTokenizer",
55
- "unk_token": "<unk>"
 
 
 
 
56
  }
 
1
  {
2
  "added_tokens_decoder": {
3
+ "151643": {
4
+ "content": "[UNK]",
5
  "lstrip": false,
6
  "normalized": false,
7
  "rstrip": false,
8
  "single_word": false,
9
  "special": true
10
  },
11
+ "151644": {
12
+ "content": "[CLS]",
13
  "lstrip": false,
14
  "normalized": false,
15
  "rstrip": false,
16
  "single_word": false,
17
  "special": true
18
  },
19
+ "151645": {
20
+ "content": "[SEP]",
21
  "lstrip": false,
22
  "normalized": false,
23
  "rstrip": false,
24
  "single_word": false,
25
  "special": true
26
  },
27
+ "151646": {
28
+ "content": "[PAD]",
29
  "lstrip": false,
30
  "normalized": false,
31
  "rstrip": false,
32
  "single_word": false,
33
  "special": true
34
  },
35
+ "151647": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
  "normalized": false,
39
  "rstrip": false,
40
  "single_word": false,
41
  "special": true
42
+ },
43
+ "151648": {
44
+ "content": "[UNUSED_1]",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": false
50
+ },
51
+ "151649": {
52
+ "content": "[UNUSED_2]",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": false
58
+ },
59
+ "151650": {
60
+ "content": "[UNUSED_3]",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": false
66
+ },
67
+ "151651": {
68
+ "content": "[UNUSED_4]",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": false
74
+ },
75
+ "151652": {
76
+ "content": "[UNUSED_5]",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": false
82
+ },
83
+ "151653": {
84
+ "content": "[UNUSED_6]",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": false
90
+ },
91
+ "151654": {
92
+ "content": "[UNUSED_7]",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": false
98
+ },
99
+ "151655": {
100
+ "content": "[UNUSED_8]",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": false
106
+ },
107
+ "151656": {
108
+ "content": "[UNUSED_9]",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": false
114
+ },
115
+ "151657": {
116
+ "content": "[UNUSED_10]",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": false
122
+ },
123
+ "151658": {
124
+ "content": "[UNUSED_11]",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": false
130
+ },
131
+ "151659": {
132
+ "content": "[UNUSED_12]",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": false
138
+ },
139
+ "151660": {
140
+ "content": "[UNUSED_13]",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": false
146
+ },
147
+ "151661": {
148
+ "content": "[UNUSED_14]",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": false
154
+ },
155
+ "151662": {
156
+ "content": "[UNUSED_15]",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": false
162
+ },
163
+ "151663": {
164
+ "content": "[UNUSED_16]",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": false
170
+ },
171
+ "151664": {
172
+ "content": "[UNUSED_17]",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": false
178
+ },
179
+ "151665": {
180
+ "content": "[UNUSED_18]",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": false
186
+ },
187
+ "151666": {
188
+ "content": "[UNUSED_19]",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": false
194
+ },
195
+ "151667": {
196
+ "content": "[UNUSED_20]",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": false
202
+ },
203
+ "151668": {
204
+ "content": "[UNUSED_21]",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": false
210
+ },
211
+ "151669": {
212
+ "content": "[UNUSED_22]",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": false
218
+ },
219
+ "151670": {
220
+ "content": "[UNUSED_23]",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": false
226
+ },
227
+ "151671": {
228
+ "content": "[UNUSED_24]",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": false
234
+ },
235
+ "151672": {
236
+ "content": "[UNUSED_25]",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": false
242
+ },
243
+ "151673": {
244
+ "content": "[UNUSED_26]",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": false
250
+ },
251
+ "151674": {
252
+ "content": "[UNUSED_27]",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": false
258
+ },
259
+ "151675": {
260
+ "content": "[UNUSED_28]",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": false
266
+ },
267
+ "151676": {
268
+ "content": "[UNUSED_29]",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": false
274
+ },
275
+ "151677": {
276
+ "content": "[UNUSED_30]",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": false
282
+ },
283
+ "151678": {
284
+ "content": "[UNUSED_31]",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": false
290
+ },
291
+ "151679": {
292
+ "content": "[UNUSED_32]",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": false
298
  }
299
  },
 
300
  "clean_up_tokenization_spaces": false,
301
+ "cls_token": "[CLS]",
302
  "do_lower_case": false,
 
303
  "extra_special_tokens": {},
304
+ "mask_token": "[MASK]",
305
+ "model_input_names": [
306
+ "input_ids",
307
+ "attention_mask"
308
+ ],
309
+ "model_max_length": 8192,
310
+ "pad_token": "[PAD]",
311
+ "sep_token": "[SEP]",
312
+ "tokenizer_class": "PreTrainedTokenizerFast",
313
+ "unk_token": "[UNK]"
314
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0248bcc08258853752fa496fa7a411c44ef6785f0a082234eb6edfe80d95f73
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08bef329c1128512ab4e1068ec66ff5a070db0952cdf5879410fab59eb8fbe91
3
  size 5432
training_args.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "output_dir": "output/facebookai_xlm_roberta_base_pt_e1",
3
  "overwrite_output_dir": false,
4
  "do_train": false,
5
  "do_eval": true,
@@ -10,12 +10,12 @@
10
  "per_device_eval_batch_size": 8,
11
  "per_gpu_train_batch_size": null,
12
  "per_gpu_eval_batch_size": null,
13
- "gradient_accumulation_steps": 32,
14
  "eval_accumulation_steps": null,
15
  "eval_delay": 0,
16
  "torch_empty_cache_steps": null,
17
- "learning_rate": 5e-05,
18
- "weight_decay": 0.01,
19
  "adam_beta1": 0.9,
20
  "adam_beta2": 0.999,
21
  "adam_epsilon": 1e-08,
@@ -24,21 +24,21 @@
24
  "max_steps": -1,
25
  "lr_scheduler_type": "warmup_stable_decay",
26
  "lr_scheduler_kwargs": {
27
- "num_decay_steps": 1562,
28
- "num_stable_steps": 12494
29
  },
30
- "warmup_ratio": 0.1,
31
  "warmup_steps": 0,
32
  "log_level": "passive",
33
  "log_level_replica": "warning",
34
  "log_on_each_node": true,
35
- "logging_dir": "output/facebookai_xlm_roberta_base_pt_e1/runs/Jan18_11-08-32_Neavo-PC",
36
  "logging_strategy": "steps",
37
  "logging_first_step": false,
38
- "logging_steps": 5,
39
  "logging_nan_inf_filter": true,
40
- "save_strategy": "no",
41
- "save_steps": 0,
42
  "save_total_limit": null,
43
  "save_safetensors": true,
44
  "save_on_each_node": false,
@@ -64,11 +64,11 @@
64
  "tpu_metrics_debug": false,
65
  "debug": [],
66
  "dataloader_drop_last": false,
67
- "eval_steps": 200,
68
- "dataloader_num_workers": 4,
69
- "dataloader_prefetch_factor": 8,
70
  "past_index": -1,
71
- "run_name": "output/facebookai_xlm_roberta_base_pt_e1",
72
  "disable_tqdm": false,
73
  "remove_unused_columns": true,
74
  "label_names": null,
@@ -107,7 +107,7 @@
107
  "ddp_bucket_cap_mb": null,
108
  "ddp_broadcast_buffers": null,
109
  "dataloader_pin_memory": true,
110
- "dataloader_persistent_workers": true,
111
  "skip_memory_metrics": true,
112
  "use_legacy_prediction_loop": false,
113
  "push_to_hub": false,
 
1
  {
2
+ "output_dir": "output/keyword_gacha_multilingual/20250128",
3
  "overwrite_output_dir": false,
4
  "do_train": false,
5
  "do_eval": true,
 
10
  "per_device_eval_batch_size": 8,
11
  "per_gpu_train_batch_size": null,
12
  "per_gpu_eval_batch_size": null,
13
+ "gradient_accumulation_steps": 224,
14
  "eval_accumulation_steps": null,
15
  "eval_delay": 0,
16
  "torch_empty_cache_steps": null,
17
+ "learning_rate": 0.0005,
18
+ "weight_decay": 1e-05,
19
  "adam_beta1": 0.9,
20
  "adam_beta2": 0.999,
21
  "adam_epsilon": 1e-08,
 
24
  "max_steps": -1,
25
  "lr_scheduler_type": "warmup_stable_decay",
26
  "lr_scheduler_kwargs": {
27
+ "num_decay_steps": 1114,
28
+ "num_stable_steps": 0
29
  },
30
+ "warmup_ratio": 0,
31
  "warmup_steps": 0,
32
  "log_level": "passive",
33
  "log_level_replica": "warning",
34
  "log_on_each_node": true,
35
+ "logging_dir": "output/keyword_gacha_multilingual/20250128/runs/Jan28_23-39-29_Neavo-PC",
36
  "logging_strategy": "steps",
37
  "logging_first_step": false,
38
+ "logging_steps": 1,
39
  "logging_nan_inf_filter": true,
40
+ "save_strategy": "steps",
41
+ "save_steps": 50,
42
  "save_total_limit": null,
43
  "save_safetensors": true,
44
  "save_on_each_node": false,
 
64
  "tpu_metrics_debug": false,
65
  "debug": [],
66
  "dataloader_drop_last": false,
67
+ "eval_steps": 50,
68
+ "dataloader_num_workers": 8,
69
+ "dataloader_prefetch_factor": null,
70
  "past_index": -1,
71
+ "run_name": "output/keyword_gacha_multilingual/20250128",
72
  "disable_tqdm": false,
73
  "remove_unused_columns": true,
74
  "label_names": null,
 
107
  "ddp_bucket_cap_mb": null,
108
  "ddp_broadcast_buffers": null,
109
  "dataloader_pin_memory": true,
110
+ "dataloader_persistent_workers": false,
111
  "skip_memory_metrics": true,
112
  "use_legacy_prediction_loop": false,
113
  "push_to_hub": false,