{"Body": "twin_encoder", "loss_balancer_meta": true, "loss_balancer_log": false, "loss_balancer_lbtw": false, "pma_skip_small": false, "isab_skip_small": false, "layer_norm": false, "pma_layer_norm": false, "attn_residual": true, "tf_n_layers_dec": false, "tf_isab_rank": 0, "tf_lora": false, "tf_layer_norm": false, "tf_pma_start": -1, "ada_n_seeds": 0, "head_n_seeds": 0, "tf_pma_low": 1, "gradient_penalty_kwargs": {"mag_loss": true, "mse_mag": true, "mag_corr": false, "seq_mag": false, "cos_loss": false, "mse_mag_kwargs": {"target": 1.0, "multiply": true, "forgive_over": true}, "mag_corr_kwargs": {"only_sign": false}, "cos_loss_kwargs": {"only_sign": true, "cos_matrix": false}}, "dropout": 0, "combine_mode": "diff_left", "tf_isab_mode": "separate", "grad_loss_fn": "mae", "single_model": true, "bias": true, "bias_final": true, "pma_ffn_mode": "none", "patience": 10, "inds_init_mode": "torch", "grad_clip": 0.75, "gradient_penalty_mode": "ALL", "synth_data": 2, "bias_lr_mul": 1.0, "bias_weight_decay": 0.05, "loss_balancer_beta": 0.75, "loss_balancer_r": 0.96, "tf_pma_low_exp_2": 4, "dataset_size_exp_2": 11, "batch_size_exp_2": 1, "epochs": 100, "lr_mul": 0.06, "n_warmup_steps": 80, "Optim": "diffgrad", "fixed_role_model": "tvae", "mse_mag_target": 0.5, "g_loss_mul": 0.2, "d_model_exp_2": 9, "attn_activation": "selu", "tf_d_inner_exp_2": 8, "tf_n_layers_enc": 4, "tf_n_head_exp_2": 5, "tf_activation": "leakyhardtanh", "tf_activation_final": "leakyhardtanh", "tf_num_inds_exp_2": 5, "ada_d_hid_exp_2": 11, "ada_n_layers": 6, "ada_activation": "leakyhardtanh", "ada_activation_final": "leakyhardsigmoid", "head_d_hid_exp_2": 7, "head_n_layers": 7, "head_n_head_exp_2": 6, "head_activation": "leakyhardsigmoid", "head_activation_final": "leakyhardsigmoid", "mse_mag": true, "mse_mag_multiply": true, "models": ["tvae"], "max_seconds": 3600}