Phi0503HMA11OLD / adapter_config.json
Litzy619's picture
End of training
0e3e8c3 verified
{
"adaptive_ratio": 0.01,
"adaptive_ratio_decay": 1.0,
"additive_modeling": false,
"allow_empty_lora": true,
"auto_mapping": null,
"base_model_name_or_path": "microsoft/Phi-3-mini-4k-instruct",
"bias": "none",
"curr_learning": true,
"detached_training": true,
"dynamic_adapter_pool": true,
"enable_lora": null,
"encoder_hidden_size": 3072,
"fan_in_fan_out": false,
"hypernetwork": true,
"inference_mode": true,
"input_based_adapter_selection": true,
"insert_zero_lora": false,
"layer_to_lora": [],
"lora_alpha": 16,
"lora_dropout": 0.05,
"merge_weights": false,
"modules_to_save": null,
"num_attention_heads": 32,
"num_layers": 32,
"num_prefix_set": 3,
"num_transformer_submodules": 1,
"num_virtual_tokens": 30,
"number_of_adapter_pre_layer": 8,
"ot_diversified_dispatcher": false,
"ot_diversified_prefix": false,
"peft_type": "PREFIX_MA_LORA",
"pool_selective_inference": true,
"pool_selective_training": true,
"prefix_projection": true,
"r": 8,
"random_routing": false,
"random_routing_inference": false,
"scale": 64,
"selective_num": 8,
"simple_hidden_matching": false,
"simple_instance_matching": true,
"target_modules": [
"qkv_proj",
"o_proj"
],
"task_type": "CAUSAL_LM",
"token_dim": 3072
}