|
{ |
|
"hook_point_in": "blocks.0.hook_attn_out", |
|
"hook_point_out": "blocks.0.hook_attn_out", |
|
"use_decoder_bias": true, |
|
"apply_decoder_bias_to_pre_encoder": false, |
|
"expansion_factor": 32, |
|
"d_model": 4096, |
|
"d_sae": 131072, |
|
"norm_activation": "token-wise", |
|
"dataset_average_activation_norm": null, |
|
"decoder_exactly_fixed_norm": false, |
|
"sparsity_include_decoder_norm": true, |
|
"use_glu_encoder": false, |
|
"init_decoder_norm": null, |
|
"init_encoder_norm": null, |
|
"init_encoder_with_decoder_transpose": true, |
|
"l1_coefficient": 1.6e-05, |
|
"l1_coefficient_warmup_steps": 29296, |
|
"lp": 1, |
|
"use_ghost_grads": false, |
|
"tp_size": 1, |
|
"ddp_size": 1 |
|
} |