Lolaibrin's picture
Training in progress, step 500
42db8c3
{
"_name_or_path": "distilbert-base-uncased",
"activation": "gelu",
"adapters": {
"adapters": {
"mam_adapter": "6e29731ca7c632cd"
},
"config_map": {
"6e29731ca7c632cd": {
"architecture": "union",
"configs": [
{
"architecture": "prefix_tuning",
"bottleneck_size": 800,
"cross_prefix": true,
"dropout": 0.0,
"encoder_prefix": true,
"flat": false,
"leave_out": [],
"non_linearity": "tanh",
"prefix_length": 30
},
{
"adapter_residual_before_ln": false,
"cross_adapter": false,
"factorized_phm_W": true,
"factorized_phm_rule": false,
"hypercomplex_nonlinearity": "glorot-uniform",
"init_weights": "mam_adapter",
"inv_adapter": null,
"inv_adapter_reduction_factor": null,
"is_parallel": true,
"learn_phm": true,
"leave_out": [],
"ln_after": false,
"ln_before": false,
"mh_adapter": false,
"non_linearity": "relu",
"original_ln_after": true,
"original_ln_before": false,
"output_adapter": true,
"phm_bias": true,
"phm_c_init": "normal",
"phm_dim": 4,
"phm_init_range": 0.0001,
"phm_layer": false,
"phm_rank": 1,
"reduction_factor": 2,
"residual_before_ln": true,
"scaling": 4.0,
"shared_W_phm": false,
"shared_phm_rule": true
}
]
}
},
"fusion_config_map": {},
"fusions": {}
},
"architectures": [
"DistilBertForQuestionAnswering"
],
"attention_dropout": 0.1,
"dim": 768,
"dropout": 0.1,
"hidden_dim": 3072,
"initializer_range": 0.02,
"max_position_embeddings": 512,
"model_type": "distilbert",
"n_heads": 12,
"n_layers": 6,
"pad_token_id": 0,
"qa_dropout": 0.1,
"seq_classif_dropout": 0.2,
"sinusoidal_pos_embds": false,
"tie_weights_": true,
"torch_dtype": "float32",
"transformers_version": "4.17.0",
"vocab_size": 30522
}