jbloom's picture
Uploaded SAE weights such that loading won't require old code.
5bd69d8 verified
raw
history blame
1.28 kB
{"model_name": "gpt2-small", "hook_point": "blocks.11.hook_resid_pre", "hook_point_layer": 11, "hook_point_head_index": null, "dataset_path": "Skylion007/openwebtext", "is_dataset_tokenized": false, "context_size": 128, "use_cached_activations": false, "cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.11.hook_resid_pre", "d_in": 768, "n_batches_in_buffer": 128, "total_training_tokens": 300000000, "store_batch_size": 32, "device": "mps", "seed": 42, "dtype": "torch.float32", "b_dec_init_method": "geometric_median", "expansion_factor": 32, "from_pretrained_path": null, "l1_coefficient": 8e-05, "lr": 0.0004, "lr_scheduler_name": null, "lr_warm_up_steps": 5000, "train_batch_size": 4096, "use_ghost_grads": false, "feature_sampling_window": 1000, "feature_sampling_method": null, "resample_batches": 1028, "feature_reinit_scale": 0.2, "dead_feature_window": 5000, "dead_feature_estimation_method": "no_fire", "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_gpt2_small_resid_pre_5", "wandb_entity": null, "wandb_log_frequency": 100, "n_checkpoints": 10, "checkpoint_path": "checkpoints/gf296egd", "d_sae": 24576, "tokens_per_buffer": 67108864, "run_name": "24576-L1-8e-05-LR-0.0004-Tokens-3.000e+08"}