{ | |
"trainer": { | |
"trainer_class": "BatchTopKTrainer", | |
"dict_class": "BatchTopKSAE", | |
"lr": 0.0001, | |
"steps": 244140, | |
"auxk_alpha": 0.03125, | |
"warmup_steps": 1000, | |
"decay_start": 195312, | |
"threshold_beta": 0.999, | |
"threshold_start_step": 1000, | |
"top_k_aux": 2048, | |
"seed": 0, | |
"activation_dim": 4096, | |
"dict_size": 131072, | |
"k": 64, | |
"device": "cuda", | |
"layer": 19, | |
"lm_name": "meta-llama/Llama-3.1-8B-Instruct", | |
"wandb_name": "3_l19-BatchTopKTrainer-meta-llama/Llama-3.1-8B-Instruct-resid_post_layer_19_trainer_1", | |
"submodule_name": "resid_post_layer_19" | |
}, | |
"buffer": { | |
"d_submodule": 4096, | |
"io": "out", | |
"n_ctxs": 244, | |
"ctx_len": 1024, | |
"refresh_batch_size": 16, | |
"out_batch_size": 2048, | |
"device": "cuda", | |
"internal_device": "cuda" | |
} | |
} |