Update config.json
Browse files- config.json +1 -27
config.json
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
{
|
2 |
-
"_num_labels": 2,
|
3 |
"accumulate_gradients": 4,
|
4 |
"ae_steps": [],
|
5 |
"amp": 2,
|
@@ -24,10 +23,8 @@
|
|
24 |
"debug": false,
|
25 |
"debug_slurm": false,
|
26 |
"debug_train": false,
|
27 |
-
"do_sample": false,
|
28 |
"dropout": 0.1,
|
29 |
"dump_path": "/checkpoint/aconneau/dumped/xlm_17_100_big.3/16656234",
|
30 |
-
"early_stopping": false,
|
31 |
"emb_dim": 1280,
|
32 |
"embed_init_std": 0.02209708691207961,
|
33 |
"encoder_only": true,
|
@@ -38,16 +35,11 @@
|
|
38 |
"eval_only": false,
|
39 |
"exp_id": "16656234",
|
40 |
"exp_name": "xlm_17_100_big.3",
|
41 |
-
"finetuning_task": null,
|
42 |
"fp16": true,
|
43 |
"gelu_activation": true,
|
44 |
"global_rank": 0,
|
45 |
"group_by_size": true,
|
46 |
"hyp_path": "/checkpoint/aconneau/dumped/xlm_17_100_big.3/16656234/hypotheses",
|
47 |
-
"id2label": {
|
48 |
-
"0": "LABEL_0",
|
49 |
-
"1": "LABEL_1"
|
50 |
-
},
|
51 |
"id2lang": {
|
52 |
"0": "af",
|
53 |
"1": "als",
|
@@ -151,14 +143,9 @@
|
|
151 |
"99": "zh_yue"
|
152 |
},
|
153 |
"init_std": 0.02,
|
154 |
-
"is_decoder": false,
|
155 |
"is_encoder": true,
|
156 |
"is_master": true,
|
157 |
"is_slurm_job": true,
|
158 |
-
"label2id": {
|
159 |
-
"LABEL_0": 0,
|
160 |
-
"LABEL_1": 1
|
161 |
-
},
|
162 |
"lambda_ae": 1.0,
|
163 |
"lambda_ae_config": null,
|
164 |
"lambda_bt": 1.0,
|
@@ -377,7 +364,6 @@
|
|
377 |
"am"
|
378 |
],
|
379 |
"layer_norm_eps": 1e-12,
|
380 |
-
"length_penalty": 1,
|
381 |
"lg_sampling_factor": 0.7,
|
382 |
"lgs": "en-es-fr-de-zh-ru-pt-it-ar-ja-id-tr-nl-pl-simple-fa-vi-sv-ko-he-ro-no-hi-uk-cs-fi-hu-th-da-ca-el-bg-sr-ms-bn-hr-sl-zh_yue-az-sk-eo-ta-sh-lt-et-ml-la-bs-sq-arz-af-ka-mr-eu-tl-ang-gl-nn-ur-kk-be-hy-te-lv-mk-zh_classical-als-is-wuu-my-sco-mn-ceb-ast-cy-kn-br-an-gu-bar-uz-lb-ne-si-war-jv-ga-zh_min_nan-oc-ku-sw-nds-ckb-ia-yi-fy-scn-gan-tt-am",
|
383 |
"local_rank": 0,
|
@@ -388,7 +374,6 @@
|
|
388 |
"max_batch_size": 0,
|
389 |
"max_epoch": 100000,
|
390 |
"max_len": 200,
|
391 |
-
"max_length": 20,
|
392 |
"max_position_embeddings": 512,
|
393 |
"max_vocab": 200000,
|
394 |
"min_count": 0,
|
@@ -1408,23 +1393,17 @@
|
|
1408 |
"n_layers": 16,
|
1409 |
"n_nodes": 4,
|
1410 |
"node_id": 0,
|
1411 |
-
"num_beams": 1,
|
1412 |
-
"num_return_sequences": 1,
|
1413 |
"optimizer": "adam_inverse_sqrt,lr=0.00005,warmup_updates=30000,beta1=0.9,beta2=0.999,weight_decay=0.01,eps=0.000001",
|
1414 |
-
"output_attentions": false,
|
1415 |
-
"output_hidden_states": false,
|
1416 |
"output_past": true,
|
1417 |
"pad_index": 2,
|
1418 |
"pad_token_id": 2,
|
1419 |
"para_dataset": {},
|
1420 |
"para_list": [],
|
1421 |
"pc_steps": [],
|
1422 |
-
"pruned_heads": {},
|
1423 |
"ref_paths": {},
|
1424 |
"reload_checkpoint": "",
|
1425 |
"reload_emb": "",
|
1426 |
"reload_model": "/checkpoint/aconneau/dumped/xlm_17_100_240_big_model_upper.2/14884511/best-valid_zh_mlm_ppl.pth",
|
1427 |
-
"repetition_penalty": 1.0,
|
1428 |
"sample_alpha": 0.5,
|
1429 |
"save_periodic": 0,
|
1430 |
"share_inout_emb": true,
|
@@ -1437,13 +1416,8 @@
|
|
1437 |
"summary_proj_to_labels": true,
|
1438 |
"summary_type": "first",
|
1439 |
"summary_use_proj": true,
|
1440 |
-
"temperature": 1.0,
|
1441 |
"tokens_per_batch": -1,
|
1442 |
-
"top_k": 50,
|
1443 |
-
"top_p": 1.0,
|
1444 |
-
"torchscript": false,
|
1445 |
"unk_index": 3,
|
1446 |
-
"use_bfloat16": false,
|
1447 |
"use_lang_emb": false,
|
1448 |
"use_memory": false,
|
1449 |
"validation_metrics": "_valid_en_mlm_ppl,_valid_mlm_ppl,_valid_zh_mlm_ppl",
|
@@ -1457,4 +1431,4 @@
|
|
1457 |
"word_rand": 0.1,
|
1458 |
"word_shuffle": 0.0,
|
1459 |
"world_size": 32
|
1460 |
-
}
|
|
|
1 |
{
|
|
|
2 |
"accumulate_gradients": 4,
|
3 |
"ae_steps": [],
|
4 |
"amp": 2,
|
|
|
23 |
"debug": false,
|
24 |
"debug_slurm": false,
|
25 |
"debug_train": false,
|
|
|
26 |
"dropout": 0.1,
|
27 |
"dump_path": "/checkpoint/aconneau/dumped/xlm_17_100_big.3/16656234",
|
|
|
28 |
"emb_dim": 1280,
|
29 |
"embed_init_std": 0.02209708691207961,
|
30 |
"encoder_only": true,
|
|
|
35 |
"eval_only": false,
|
36 |
"exp_id": "16656234",
|
37 |
"exp_name": "xlm_17_100_big.3",
|
|
|
38 |
"fp16": true,
|
39 |
"gelu_activation": true,
|
40 |
"global_rank": 0,
|
41 |
"group_by_size": true,
|
42 |
"hyp_path": "/checkpoint/aconneau/dumped/xlm_17_100_big.3/16656234/hypotheses",
|
|
|
|
|
|
|
|
|
43 |
"id2lang": {
|
44 |
"0": "af",
|
45 |
"1": "als",
|
|
|
143 |
"99": "zh_yue"
|
144 |
},
|
145 |
"init_std": 0.02,
|
|
|
146 |
"is_encoder": true,
|
147 |
"is_master": true,
|
148 |
"is_slurm_job": true,
|
|
|
|
|
|
|
|
|
149 |
"lambda_ae": 1.0,
|
150 |
"lambda_ae_config": null,
|
151 |
"lambda_bt": 1.0,
|
|
|
364 |
"am"
|
365 |
],
|
366 |
"layer_norm_eps": 1e-12,
|
|
|
367 |
"lg_sampling_factor": 0.7,
|
368 |
"lgs": "en-es-fr-de-zh-ru-pt-it-ar-ja-id-tr-nl-pl-simple-fa-vi-sv-ko-he-ro-no-hi-uk-cs-fi-hu-th-da-ca-el-bg-sr-ms-bn-hr-sl-zh_yue-az-sk-eo-ta-sh-lt-et-ml-la-bs-sq-arz-af-ka-mr-eu-tl-ang-gl-nn-ur-kk-be-hy-te-lv-mk-zh_classical-als-is-wuu-my-sco-mn-ceb-ast-cy-kn-br-an-gu-bar-uz-lb-ne-si-war-jv-ga-zh_min_nan-oc-ku-sw-nds-ckb-ia-yi-fy-scn-gan-tt-am",
|
369 |
"local_rank": 0,
|
|
|
374 |
"max_batch_size": 0,
|
375 |
"max_epoch": 100000,
|
376 |
"max_len": 200,
|
|
|
377 |
"max_position_embeddings": 512,
|
378 |
"max_vocab": 200000,
|
379 |
"min_count": 0,
|
|
|
1393 |
"n_layers": 16,
|
1394 |
"n_nodes": 4,
|
1395 |
"node_id": 0,
|
|
|
|
|
1396 |
"optimizer": "adam_inverse_sqrt,lr=0.00005,warmup_updates=30000,beta1=0.9,beta2=0.999,weight_decay=0.01,eps=0.000001",
|
|
|
|
|
1397 |
"output_past": true,
|
1398 |
"pad_index": 2,
|
1399 |
"pad_token_id": 2,
|
1400 |
"para_dataset": {},
|
1401 |
"para_list": [],
|
1402 |
"pc_steps": [],
|
|
|
1403 |
"ref_paths": {},
|
1404 |
"reload_checkpoint": "",
|
1405 |
"reload_emb": "",
|
1406 |
"reload_model": "/checkpoint/aconneau/dumped/xlm_17_100_240_big_model_upper.2/14884511/best-valid_zh_mlm_ppl.pth",
|
|
|
1407 |
"sample_alpha": 0.5,
|
1408 |
"save_periodic": 0,
|
1409 |
"share_inout_emb": true,
|
|
|
1416 |
"summary_proj_to_labels": true,
|
1417 |
"summary_type": "first",
|
1418 |
"summary_use_proj": true,
|
|
|
1419 |
"tokens_per_batch": -1,
|
|
|
|
|
|
|
1420 |
"unk_index": 3,
|
|
|
1421 |
"use_lang_emb": false,
|
1422 |
"use_memory": false,
|
1423 |
"validation_metrics": "_valid_en_mlm_ppl,_valid_mlm_ppl,_valid_zh_mlm_ppl",
|
|
|
1431 |
"word_rand": 0.1,
|
1432 |
"word_shuffle": 0.0,
|
1433 |
"world_size": 32
|
1434 |
+
}
|