{ | |
"ampere_pruning_method": "disabled", | |
"attention_block_cols": 32, | |
"attention_block_rows": 32, | |
"attention_lambda": 1.0, | |
"attention_output_with_dense": 0, | |
"attention_pruning_method": "sigmoied_threshold", | |
"bias_mask": true, | |
"dense_block_cols": 1, | |
"dense_block_rows": 1, | |
"dense_lambda": 1.0, | |
"dense_pruning_method": "sigmoied_threshold:1d_alt", | |
"distil_alpha_ce": 0.1, | |
"distil_alpha_teacher": 0.9, | |
"distil_teacher_name_or_path": "bert-large-uncased-whole-word-masking-finetuned-squad", | |
"distil_temperature": 2.0, | |
"final_ampere_temperature": 20.0, | |
"final_finetune": false, | |
"final_threshold": 0.1, | |
"final_warmup": 10, | |
"initial_ampere_temperature": 0.0, | |
"initial_threshold": 0, | |
"initial_warmup": 1, | |
"mask_init": "constant", | |
"mask_scale": 0.0, | |
"mask_scores_learning_rate": 0.01, | |
"regularization": "l1", | |
"regularization_final_lambda": 20 | |
} |