|
{ |
|
"_name_or_path": "backup/snowy-yogurt-6/", |
|
"adapt_attn_span": 32, |
|
"adapt_bs": 384, |
|
"adapt_dynamic": false, |
|
"adapt_nb_heads": 12, |
|
"adapt_reg": false, |
|
"adapt_span_cache": false, |
|
"adapt_span_loss_coeff": 0.01, |
|
"adapt_span_ramp": 16, |
|
"architectures": [ |
|
"BertModel" |
|
], |
|
"attention_probs_dropout_prob": 0.05, |
|
"attention_probs_dropout_prob_noise": 0.155, |
|
"attention_probs_dropout_prob_reference": 0.1, |
|
"classifier_dropout": null, |
|
"dynamic_ramp": false, |
|
"gradient_checkpointing": false, |
|
"hack": false, |
|
"headmask": 0.0, |
|
"hidden_act": "gelu", |
|
"hidden_dropout_prob": 0.05, |
|
"hidden_dropout_prob_noise": 0.155, |
|
"hidden_dropout_prob_reference": 0.1, |
|
"hidden_size": 768, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 3072, |
|
"layer_norm_eps": 1e-12, |
|
"layerdrop": 0.0, |
|
"layerdrop_dynamic": false, |
|
"layerdrop_manifold": null, |
|
"layerdrop_noise": false, |
|
"layerdrop_noise_std": 0.1, |
|
"max_position_embeddings": 512, |
|
"mixup": false, |
|
"mixup_highest_layer": 12, |
|
"mixup_index": -99, |
|
"mixup_lowest_layer": 9, |
|
"mixup_manifold": null, |
|
"mixup_noise": false, |
|
"mixup_rate": [ |
|
0.0, |
|
0.0, |
|
0.0 |
|
], |
|
"mixup_same_sample": false, |
|
"mixup_samples": [ |
|
0, |
|
1, |
|
1 |
|
], |
|
"mixup_strategy1": [ |
|
0, |
|
1, |
|
0 |
|
], |
|
"mixup_strategy2": [ |
|
0, |
|
0, |
|
0 |
|
], |
|
"model_type": "bert", |
|
"multi_dropout": false, |
|
"num_attention_heads": 12, |
|
"num_hidden_layers": 12, |
|
"pad_token_id": 0, |
|
"position_embedding_type": "absolute", |
|
"single_dropout": false, |
|
"task_alpha": 1.0, |
|
"task_beta": 1.05, |
|
"task_lambda": 0.013, |
|
"torch_dtype": "float32", |
|
"transformers_version": "4.10.0.dev0", |
|
"type_vocab_size": 2, |
|
"use_cache": true, |
|
"vocab_size": 30522 |
|
} |
|
|