File size: 1,007 Bytes
6213ff6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
{
"device": "cuda:0",
"seed": 42,
"dtype": "torch.bfloat16",
"hook_point_in": "blocks.29.hook_attn_out",
"hook_point_out": "blocks.29.hook_attn_out",
"use_decoder_bias": true,
"apply_decoder_bias_to_pre_encoder": false,
"expansion_factor": 32,
"d_model": 4096,
"d_sae": 131072,
"bias_init_method": "all_zero",
"act_fn": "jumprelu",
"jump_relu_threshold": 0.2314453125,
"norm_activation": "dataset-wise",
"dataset_average_activation_norm": {
"in": 7.46875,
"out": 7.46875
},
"decoder_exactly_fixed_norm": false,
"sparsity_include_decoder_norm": true,
"use_glu_encoder": false,
"init_decoder_norm": 0.5,
"init_encoder_norm": null,
"init_encoder_with_decoder_transpose": true,
"lp": 1,
"l1_coefficient": 8e-05,
"l1_coefficient_warmup_steps": 78125,
"top_k": 50,
"k_warmup_steps": 78125,
"use_batch_norm_mse": true,
"use_ghost_grads": false,
"tp_size": 1,
"ddp_size": 1
} |