Jkatzy's picture
Upload APMAE
6ca6643 verified
{
"_name_or_path": "runs/f6oomb7y/SavedModels/Model_e1_300000",
"ap_mae_preload_name": null,
"architectures": [
"APMAE"
],
"attention_scaler": "log_normalize",
"base_learning_rate": 0.00015,
"correct_only": true,
"dataset_location": "fahamu/ioi",
"dataset_name": "",
"dataset_split_seed": 42,
"dataset_test_split": "train[1000000:1010000]",
"dataset_train_split": "train[0:1000000]",
"decoder_device": "cuda:0",
"decoder_dim": 512,
"decoder_dim_head": 64,
"decoder_heads": 8,
"decoder_layers": 8,
"decoder_mlp_dim": 2048,
"encoder_device": "cuda:0",
"encoder_dim": 512,
"encoder_dim_head": 64,
"encoder_dropout": 0.0,
"encoder_emb_dropout": 0.0,
"encoder_heads": 16,
"encoder_layers": 24,
"encoder_mlp_dim": 2048,
"encoder_pool": "cls",
"hf_home": "./huggingface",
"hidden_act": "gelu",
"initial_seed": 45,
"iter_loader_workers": 8,
"lang": "java",
"layer_norm_eps": 1e-12,
"mask_ratio": 0.5,
"max_epochs": 1,
"max_length": 32,
"min_length": 16,
"model_type": "ap_mae",
"patch_size": 2,
"qkv_bias": false,
"queries": [
"ABBA",
"ABAB"
],
"save_model_frequency": 15000,
"target_model_device": "cuda:0",
"target_model_name": "openai-community/gpt2",
"test_batch_size": 1,
"test_head_selection_strategy": "all",
"torch_dtype": "float32",
"train_batch_size": 60,
"train_batches": 150000,
"train_head_selection_strategy": [
"layerwise",
1
],
"transformers_version": "4.47.1",
"val_batches": 3840,
"visualize_frequency": 2000,
"visualize_norm": null
}