|
{ |
|
"args": { |
|
"adam_epsilon": 1e-08, |
|
"alpha_ce": 0.1, |
|
"alpha_distil": 0.9, |
|
"ampere_learning_rate": 0.01, |
|
"ampere_mask_init": "constant", |
|
"ampere_mask_scale": 0.0, |
|
"ampere_pruning_method": "disabled", |
|
"cache_dir": "", |
|
"config_name": "", |
|
"data_dir": "squad_data", |
|
"do_eval": true, |
|
"do_lower_case": true, |
|
"do_train": true, |
|
"doc_stride": 128, |
|
"eval_all_checkpoints": true, |
|
"eval_batch_size": 16, |
|
"evaluate_during_training": false, |
|
"final_ampere_temperature": 20, |
|
"final_lambda": 100, |
|
"final_shuffling_temperature": 20, |
|
"final_threshold": 0.1, |
|
"final_warmup": 10, |
|
"fp16": false, |
|
"fp16_opt_level": "O1", |
|
"global_topk": false, |
|
"global_topk_frequency_compute": 25, |
|
"gradient_accumulation_steps": 1, |
|
"in_shuffling_group": 4, |
|
"initial_ampere_temperature": 0.0, |
|
"initial_shuffling_temperature": 0.1, |
|
"initial_threshold": 0.0, |
|
"initial_warmup": 1, |
|
"lang_id": 0, |
|
"learning_rate": 3e-05, |
|
"local_rank": -1, |
|
"logging_steps": 500, |
|
"mask_block_cols": 32, |
|
"mask_block_rows": 32, |
|
"mask_init": "constant", |
|
"mask_scale": 0.0, |
|
"mask_scores_learning_rate": 0.01, |
|
"max_answer_length": 30, |
|
"max_grad_norm": 1.0, |
|
"max_query_length": 64, |
|
"max_seq_length": 384, |
|
"max_steps": -1, |
|
"model_name_or_path": "bert-base-uncased", |
|
"model_type": "masked_bert", |
|
"n_best_size": 20, |
|
"n_gpu": 1, |
|
"no_cuda": false, |
|
"null_score_diff_threshold": 0.0, |
|
"num_train_epochs": 20.0, |
|
"out_shuffling_group": 4, |
|
"overwrite_cache": false, |
|
"overwrite_output_dir": true, |
|
"per_gpu_eval_batch_size": 16, |
|
"per_gpu_train_batch_size": 16, |
|
"predict_file": "dev-v1.1.json", |
|
"pruning_method": "sigmoied_threshold", |
|
"pruning_submethod": "default", |
|
"regularization": "l1", |
|
"save_steps": 5000, |
|
"seed": 42, |
|
"server_ip": "", |
|
"server_port": "", |
|
"shuffling_learning_rate": 0.001, |
|
"shuffling_method": "disabled", |
|
"teacher_name_or_path": "csarron/bert-base-uncased-squad-v1", |
|
"teacher_type": "bert", |
|
"temperature": 2.0, |
|
"threads": 8, |
|
"tokenizer_name": "", |
|
"train_batch_size": 16, |
|
"train_file": "train-v1.1.json", |
|
"truncate_train_examples": -1, |
|
"verbose_logging": false, |
|
"version_2_with_negative": false, |
|
"warmup_steps": 5400, |
|
"weight_decay": 0.0 |
|
}, |
|
"config": { |
|
"_name_or_path": "bert-base-uncased", |
|
"ampere_mask_init": "constant", |
|
"ampere_mask_scale": 0.0, |
|
"ampere_pruning_method": "disabled", |
|
"architectures": ["MaskedBertForQuestionAnswering"], |
|
"attention_probs_dropout_prob": 0.1, |
|
"hidden_act": "gelu", |
|
"hidden_dropout_prob": 0.1, |
|
"hidden_size": 768, |
|
"in_shuffling_group": 4, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 3072, |
|
"layer_norm_eps": 1e-12, |
|
"mask_block_cols": 32, |
|
"mask_block_rows": 32, |
|
"mask_init": "constant", |
|
"mask_scale": 0.0, |
|
"max_position_embeddings": 512, |
|
"model_type": "masked_bert", |
|
"num_attention_heads": 12, |
|
"num_hidden_layers": 12, |
|
"out_shuffling_group": 4, |
|
"pad_token_id": 0, |
|
"pruning_method": "sigmoied_threshold", |
|
"pruning_submethod": "default", |
|
"shuffling_method": "disabled", |
|
"type_vocab_size": 2, |
|
"vocab_size": 30522 |
|
}, |
|
"packaging": { |
|
"model_name": "madlag/bert-base-uncased-squad1.1-block-sparse-0.13-v1", |
|
"model_owner": "madlag", |
|
"pytorch_final_file_size": 359300023 |
|
}, |
|
"performance": { |
|
"dense": { |
|
"eval_elapsed_time": 43.292644419998396 |
|
}, |
|
"pytorch_block_sparse": { |
|
"eval_elapsed_time": 26.211968197021633 |
|
}, |
|
"speedup": 1.6516365384922744 |
|
}, |
|
"precision": { |
|
"exact": 74.38978576660156, |
|
"f1": 83.25814819335938 |
|
}, |
|
"sparsity": { |
|
"ampere": false, |
|
"block_size": [32, 32], |
|
"block_sparse": true, |
|
"block_sparse_density": 0.12510850694444445, |
|
"block_sparse_nnz": 10377, |
|
"block_sparse_total": 82944, |
|
"global_density": 0.32128202590889765, |
|
"is_block_sparse_valid": true, |
|
"nnz_parameters": 35175170, |
|
"parameters": 109483778, |
|
"pruned_heads": { |
|
"0": [0, 1, 2, 4, 5, 6, 7, 9, 11], |
|
"1": [0, 1, 2, 3, 5, 6, 7, 8, 9], |
|
"2": [1, 2, 3, 4, 5, 7, 8, 10, 11], |
|
"3": [2, 3, 4, 6, 7, 10], |
|
"4": [0, 1, 2, 4, 6, 7, 8, 10, 11], |
|
"5": [0, 1, 2, 4, 5, 6, 7, 11], |
|
"6": [0, 2, 3, 4, 6, 7, 10], |
|
"7": [1, 2, 3, 5, 6, 7, 11], |
|
"8": [0, 1, 2, 3, 4, 5, 6, 7, 8], |
|
"9": [1, 3, 4, 5, 7, 9, 10], |
|
"10": [0, 1, 2, 4, 5, 6, 7, 8, 9], |
|
"11": [0, 2, 3, 5, 7, 8, 10, 11] |
|
}, |
|
"total_attention_heads": 144, |
|
"total_pruned_attention_heads": 97 |
|
} |
|
} |