{ "config": { "adapter_residual_before_ln": false, "cross_adapter": false, "factorized_phm_W": true, "factorized_phm_rule": false, "hypercomplex_nonlinearity": "glorot-uniform", "init_weights": "bert", "inv_adapter": null, "inv_adapter_reduction_factor": null, "is_parallel": false, "learn_phm": true, "leave_out": [], "ln_after": false, "ln_before": false, "mh_adapter": false, "non_linearity": "relu", "original_ln_after": true, "original_ln_before": true, "output_adapter": true, "phm_bias": true, "phm_c_init": "normal", "phm_dim": 4, "phm_init_range": 0.0001, "phm_layer": false, "phm_rank": 1, "reduction_factor": 16, "residual_before_ln": true, "scaling": 1.0, "shared_W_phm": false, "shared_phm_rule": true, "use_gating": false }, "config_id": "9076f36a74755ac4", "hidden_size": 1024, "model_class": "BertForSequenceClassification", "model_name": "bert-large-cased", "model_type": "bert", "name": "snli", "version": "0.1.1" }