{ | |
"optim": { | |
"type": "AdamW", | |
"lr": 0.001, | |
"betas": [ | |
0.9, | |
0.98 | |
], | |
"eps": 1e-12, | |
"weight_decay": 0.01, | |
"amsgrad": false | |
}, | |
"optim_mod": { | |
"name": "none" | |
}, | |
"name": "bert-o3", | |
"limited_decay_keys": [ | |
"bias", | |
"LayerNorm.bias", | |
"LayerNorm.weight" | |
], | |
"warmup_steps": 10000, | |
"cooldown_steps": 0, | |
"steps": 100000, | |
"scheduler": "budget-triangle2", | |
"batch_size": 4096, | |
"batch_size_ramp": 300000, | |
"gradient_clipping": 0.5, | |
"pretrain_in_train_mode": false, | |
"objective": { | |
"name": "masked-lm", | |
"mlm_probability": 0.15, | |
"use_80_20_rule": true, | |
"disable_mlm": false, | |
"token_drop": 0.0 | |
}, | |
"reverse_dataset_order": false, | |
"budget": 24, | |
"gradinit": { | |
"enabled": false | |
} | |
} |