canrager commited on Jan 16

Commit

18190d5

verified ·

1 Parent(s): c69040d

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_10/ae.pt +3 -0
gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_10/config.json +32 -0
gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_10/eval_results.json +1 -0
gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_11/ae.pt +3 -0
gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_11/config.json +32 -0
gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_11/eval_results.json +1 -0
gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_6/ae.pt +3 -0
gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_6/config.json +32 -0
gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_6/eval_results.json +1 -0
gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_7/ae.pt +3 -0
gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_7/config.json +32 -0
gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_7/eval_results.json +1 -0
gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_8/ae.pt +3 -0
gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_8/config.json +32 -0
gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_8/eval_results.json +1 -0
gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_9/ae.pt +3 -0
gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_9/config.json +32 -0
gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_9/eval_results.json +1 -0
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_0/ae.pt +3 -0
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_0/config.json +31 -0
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_0/eval_results.json +1 -0
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_1/ae.pt +3 -0
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_1/config.json +31 -0
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_1/eval_results.json +1 -0
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_2/ae.pt +3 -0
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_2/config.json +31 -0
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_2/eval_results.json +1 -0
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_3/ae.pt +3 -0
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_3/config.json +31 -0
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_3/eval_results.json +1 -0
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_4/ae.pt +3 -0
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_4/config.json +31 -0
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_4/eval_results.json +1 -0
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_5/ae.pt +3 -0
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_5/config.json +31 -0
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_5/eval_results.json +1 -0
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_0/ae.pt +3 -0
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_0/config.json +31 -0
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_244/ae.pt +3 -0
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_244/config.json +31 -0
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_2441/ae.pt +3 -0
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_2441/config.json +31 -0
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_24414/ae.pt +3 -0
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_24414/config.json +31 -0
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_772/ae.pt +3 -0
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_772/config.json +31 -0
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_7720/ae.pt +3 -0
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_7720/config.json +31 -0
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_77203/ae.pt +3 -0
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_77203/config.json +31 -0

gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_10/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bc8f9d716c77c9ae84b25674b076b969f4e52947c94b3a450f4214bfe26f37be
+size 302066710

gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_10/config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+    "trainer": {
+        "trainer_class": "BatchTopKTrainer",
+        "dict_class": "BatchTopKSAE",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 1152,
+        "seed": 0,
+        "activation_dim": 2304,
+        "dict_size": 16384,
+        "k": 320,
+        "device": "cuda:0",
+        "layer": 12,
+        "lm_name": "google/gemma-2-2b",
+        "wandb_name": "BatchTopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_10",
+        "submodule_name": "resid_post_layer_12"
+    },
+    "buffer": {
+        "d_submodule": 2304,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_10/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"l2_loss": 42.79875, "l1_loss": 1562.96, "l0": 321.2723876953125, "frac_variance_explained": 0.90958984375, "cossim": 0.953125, "l2_ratio": 0.95298828125, "relative_reconstruction_bias": 1.00021484375, "loss_original": 2.152294921875, "loss_reconstructed": 2.18591796875, "loss_zero": 12.4375, "frac_recovered": 0.996171875, "frac_alive": 0.96368408203125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}

gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_11/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fe22ddf8d9511da263f3106bc3fbaf500b6dc0701f6c1085d5f7b47ac6415447
+size 302066710

gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_11/config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+    "trainer": {
+        "trainer_class": "BatchTopKTrainer",
+        "dict_class": "BatchTopKSAE",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 1152,
+        "seed": 0,
+        "activation_dim": 2304,
+        "dict_size": 16384,
+        "k": 640,
+        "device": "cuda:0",
+        "layer": 12,
+        "lm_name": "google/gemma-2-2b",
+        "wandb_name": "BatchTopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_11",
+        "submodule_name": "resid_post_layer_12"
+    },
+    "buffer": {
+        "d_submodule": 2304,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_11/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"l2_loss": 34.8, "l1_loss": 2747.28, "l0": 642.0887109375, "frac_variance_explained": 0.94068359375, "cossim": 0.968359375, "l2_ratio": 0.9684765625, "relative_reconstruction_bias": 0.999453125, "loss_original": 2.152294921875, "loss_reconstructed": 2.16748046875, "loss_zero": 12.4375, "frac_recovered": 0.99841796875, "frac_alive": 0.83428955078125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}

gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_6/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:01638d64af64ef161f923e2ca4d20742581452606d7cb0f491d66c146ed1962a
+size 302066710

gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_6/config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+    "trainer": {
+        "trainer_class": "BatchTopKTrainer",
+        "dict_class": "BatchTopKSAE",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 1152,
+        "seed": 0,
+        "activation_dim": 2304,
+        "dict_size": 16384,
+        "k": 20,
+        "device": "cuda:0",
+        "layer": 12,
+        "lm_name": "google/gemma-2-2b",
+        "wandb_name": "BatchTopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_6",
+        "submodule_name": "resid_post_layer_12"
+    },
+    "buffer": {
+        "d_submodule": 2304,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_6/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"l2_loss": 64.55875, "l1_loss": 286.01, "l0": 19.994365234375, "frac_variance_explained": 0.790859375, "cossim": 0.89044921875, "l2_ratio": 0.890546875, "relative_reconstruction_bias": 1.00109375, "loss_original": 2.152294921875, "loss_reconstructed": 2.385322265625, "loss_zero": 12.4375, "frac_recovered": 0.97697265625, "frac_alive": 0.97857666015625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}

gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_7/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5477b613d15f981e2767354521bafe5cbc85e33189b6064b0a327d0ff40f04bd
+size 302066710

gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_7/config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+    "trainer": {
+        "trainer_class": "BatchTopKTrainer",
+        "dict_class": "BatchTopKSAE",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 1152,
+        "seed": 0,
+        "activation_dim": 2304,
+        "dict_size": 16384,
+        "k": 40,
+        "device": "cuda:0",
+        "layer": 12,
+        "lm_name": "google/gemma-2-2b",
+        "wandb_name": "BatchTopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_7",
+        "submodule_name": "resid_post_layer_12"
+    },
+    "buffer": {
+        "d_submodule": 2304,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_7/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"l2_loss": 59.685, "l1_loss": 387.52, "l0": 39.811513671875, "frac_variance_explained": 0.82166015625, "cossim": 0.90671875, "l2_ratio": 0.90642578125, "relative_reconstruction_bias": 1.000546875, "loss_original": 2.152294921875, "loss_reconstructed": 2.2889453125, "loss_zero": 12.4375, "frac_recovered": 0.98634765625, "frac_alive": 0.98846435546875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}

gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_8/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7dab4effd204e72a9f6df3c3ba4ae455f86864b10c10541f26f83cb8ed961a49
+size 302066710

gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_8/config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+    "trainer": {
+        "trainer_class": "BatchTopKTrainer",
+        "dict_class": "BatchTopKSAE",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 1152,
+        "seed": 0,
+        "activation_dim": 2304,
+        "dict_size": 16384,
+        "k": 80,
+        "device": "cuda:0",
+        "layer": 12,
+        "lm_name": "google/gemma-2-2b",
+        "wandb_name": "BatchTopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_8",
+        "submodule_name": "resid_post_layer_12"
+    },
+    "buffer": {
+        "d_submodule": 2304,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_8/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"l2_loss": 54.58, "l1_loss": 543.6, "l0": 80.091689453125, "frac_variance_explained": 0.8526171875, "cossim": 0.9221484375, "l2_ratio": 0.92234375, "relative_reconstruction_bias": 1.00125, "loss_original": 2.152294921875, "loss_reconstructed": 2.23978515625, "loss_zero": 12.4375, "frac_recovered": 0.9907421875, "frac_alive": 0.99298095703125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}

gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_9/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fcb46b6ec70f4b971faf416bceb715ba7aeba7850241111a62255128c3d57f13
+size 302066710

gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_9/config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+    "trainer": {
+        "trainer_class": "BatchTopKTrainer",
+        "dict_class": "BatchTopKSAE",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 1152,
+        "seed": 0,
+        "activation_dim": 2304,
+        "dict_size": 16384,
+        "k": 160,
+        "device": "cuda:0",
+        "layer": 12,
+        "lm_name": "google/gemma-2-2b",
+        "wandb_name": "BatchTopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_9",
+        "submodule_name": "resid_post_layer_12"
+    },
+    "buffer": {
+        "d_submodule": 2304,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_9/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"l2_loss": 49.27125, "l1_loss": 898.26, "l0": 158.950009765625, "frac_variance_explained": 0.88154296875, "cossim": 0.93748046875, "l2_ratio": 0.93708984375, "relative_reconstruction_bias": 1.00015625, "loss_original": 2.152294921875, "loss_reconstructed": 2.206025390625, "loss_zero": 12.4375, "frac_recovered": 0.99396484375, "frac_alive": 0.9896240234375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}

gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_0/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6b59deb52ed3542b238759a5c63a5d13efdada046e8be6fd08e77987cf0a9ccd
+size 302066710

gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_0/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+    "trainer": {
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 0,
+        "activation_dim": 2304,
+        "dict_size": 16384,
+        "k": 20,
+        "device": "cuda:0",
+        "layer": 12,
+        "lm_name": "google/gemma-2-2b",
+        "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_0",
+        "submodule_name": "resid_post_layer_12"
+    },
+    "buffer": {
+        "d_submodule": 2304,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_0/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"l2_loss": 64.96625, "l1_loss": 280.28, "l0": 19.997119140625, "frac_variance_explained": 0.78849609375, "cossim": 0.88927734375, "l2_ratio": 0.889609375, "relative_reconstruction_bias": 1.0008203125, "loss_original": 2.152294921875, "loss_reconstructed": 2.403408203125, "loss_zero": 12.4375, "frac_recovered": 0.9754296875, "frac_alive": 0.949951171875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}

gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_1/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4b0757ae9e45d28c0f15487b22b32bd47e8a2f3ebfb7f3ef2c6ed01dcf848de0
+size 302066710

gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_1/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+    "trainer": {
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 0,
+        "activation_dim": 2304,
+        "dict_size": 16384,
+        "k": 40,
+        "device": "cuda:0",
+        "layer": 12,
+        "lm_name": "google/gemma-2-2b",
+        "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_1",
+        "submodule_name": "resid_post_layer_12"
+    },
+    "buffer": {
+        "d_submodule": 2304,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_1/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"l2_loss": 59.7275, "l1_loss": 382.27, "l0": 39.99611328125, "frac_variance_explained": 0.81900390625, "cossim": 0.90693359375, "l2_ratio": 0.90705078125, "relative_reconstruction_bias": 1.00095703125, "loss_original": 2.152294921875, "loss_reconstructed": 2.30181640625, "loss_zero": 12.4375, "frac_recovered": 0.985234375, "frac_alive": 0.98614501953125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}

gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_2/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:683749246f05b505533f29b04e9f04f4e2260dacb0c6e329422ed5a2934992f6
+size 302066710

gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_2/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+    "trainer": {
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 0,
+        "activation_dim": 2304,
+        "dict_size": 16384,
+        "k": 80,
+        "device": "cuda:0",
+        "layer": 12,
+        "lm_name": "google/gemma-2-2b",
+        "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_2",
+        "submodule_name": "resid_post_layer_12"
+    },
+    "buffer": {
+        "d_submodule": 2304,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_2/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"l2_loss": 54.65125, "l1_loss": 529.98, "l0": 79.9989892578125, "frac_variance_explained": 0.84908203125, "cossim": 0.92232421875, "l2_ratio": 0.9225, "relative_reconstruction_bias": 1.00234375, "loss_original": 2.152294921875, "loss_reconstructed": 2.249189453125, "loss_zero": 12.4375, "frac_recovered": 0.99015625, "frac_alive": 0.9962158203125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}

gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_3/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:203a4f1acc8fc820a165a1245e7344959a603c0b2da8b9655b151c06ebba51d8
+size 302066710

gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_3/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+    "trainer": {
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 0,
+        "activation_dim": 2304,
+        "dict_size": 16384,
+        "k": 160,
+        "device": "cuda:0",
+        "layer": 12,
+        "lm_name": "google/gemma-2-2b",
+        "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_3",
+        "submodule_name": "resid_post_layer_12"
+    },
+    "buffer": {
+        "d_submodule": 2304,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_3/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"l2_loss": 49.2875, "l1_loss": 773.14, "l0": 159.99974609375, "frac_variance_explained": 0.87669921875, "cossim": 0.9375, "l2_ratio": 0.93748046875, "relative_reconstruction_bias": 0.99919921875, "loss_original": 2.152294921875, "loss_reconstructed": 2.212333984375, "loss_zero": 12.4375, "frac_recovered": 0.9933984375, "frac_alive": 0.99652099609375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}

gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_4/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:384f7d4facce581a40eb20bf342fea7bef2a6e416b7b39532485155cd916bab3
+size 302066710

gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_4/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+    "trainer": {
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 0,
+        "activation_dim": 2304,
+        "dict_size": 16384,
+        "k": 320,
+        "device": "cuda:0",
+        "layer": 12,
+        "lm_name": "google/gemma-2-2b",
+        "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_4",
+        "submodule_name": "resid_post_layer_12"
+    },
+    "buffer": {
+        "d_submodule": 2304,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_4/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"l2_loss": 42.815, "l1_loss": 1410.92, "l0": 319.9121923828125, "frac_variance_explained": 0.90875, "cossim": 0.953125, "l2_ratio": 0.953125, "relative_reconstruction_bias": 1.00017578125, "loss_original": 2.152294921875, "loss_reconstructed": 2.18783203125, "loss_zero": 12.4375, "frac_recovered": 0.99615234375, "frac_alive": 0.99505615234375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}

gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_5/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8f6efe1d15269f95285203ba07c8b2872302e3494e98e57b3ba098883ff9df8a
+size 302066710

gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_5/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+    "trainer": {
+        "trainer_class": "TopKTrainer",
+        "dict_class": "AutoEncoderTopK",
+        "lr": 0.0003,
+        "steps": 244140,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 195312,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "seed": 0,
+        "activation_dim": 2304,
+        "dict_size": 16384,
+        "k": 640,
+        "device": "cuda:0",
+        "layer": 12,
+        "lm_name": "google/gemma-2-2b",
+        "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_5",
+        "submodule_name": "resid_post_layer_12"
+    },
+    "buffer": {
+        "d_submodule": 2304,
+        "io": "out",
+        "n_ctxs": 244,
+        "ctx_len": 1024,
+        "refresh_batch_size": 4,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_5/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"l2_loss": 34.06125, "l1_loss": 2503.6, "l0": 639.193583984375, "frac_variance_explained": 0.94197265625, "cossim": 0.97115234375, "l2_ratio": 0.9715625, "relative_reconstruction_bias": 1.0005859375, "loss_original": 2.152294921875, "loss_reconstructed": 2.167255859375, "loss_zero": 12.4375, "frac_recovered": 0.998359375, "frac_alive": 0.95263671875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}

gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_0/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:131924249230717015dfb802e9b890d7fec441de39c2d79d5b786d2571a8d689
+size 302066858

gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_0/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "trainer": {
+    "trainer_class": "TopKTrainer",
+    "dict_class": "AutoEncoderTopK",
+    "lr": 0.0003,
+    "steps": "0",
+    "auxk_alpha": 0.03125,
+    "warmup_steps": 1000,
+    "decay_start": 195312,
+    "threshold_beta": 0.999,
+    "threshold_start_step": 1000,
+    "seed": 0,
+    "activation_dim": 2304,
+    "dict_size": 16384,
+    "k": 20,
+    "device": "cuda:0",
+    "layer": 12,
+    "lm_name": "google/gemma-2-2b",
+    "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_0",
+    "submodule_name": "resid_post_layer_12"
+  },
+  "buffer": {
+    "d_submodule": 2304,
+    "io": "out",
+    "n_ctxs": 244,
+    "ctx_len": 1024,
+    "refresh_batch_size": 4,
+    "out_batch_size": 2048,
+    "device": "cuda:0"
+  }
+}

gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_244/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:715c667b68c8e965808597605943b7d4f28345c10f385bf7a12b469e3dd0d042
+size 302066878

gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_244/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "trainer": {
+    "trainer_class": "TopKTrainer",
+    "dict_class": "AutoEncoderTopK",
+    "lr": 0.0003,
+    "steps": "244",
+    "auxk_alpha": 0.03125,
+    "warmup_steps": 1000,
+    "decay_start": 195312,
+    "threshold_beta": 0.999,
+    "threshold_start_step": 1000,
+    "seed": 0,
+    "activation_dim": 2304,
+    "dict_size": 16384,
+    "k": 20,
+    "device": "cuda:0",
+    "layer": 12,
+    "lm_name": "google/gemma-2-2b",
+    "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_0",
+    "submodule_name": "resid_post_layer_12"
+  },
+  "buffer": {
+    "d_submodule": 2304,
+    "io": "out",
+    "n_ctxs": 244,
+    "ctx_len": 1024,
+    "refresh_batch_size": 4,
+    "out_batch_size": 2048,
+    "device": "cuda:0"
+  }
+}

gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_2441/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c8e60b992aa98c678742d578f0790d9a8d3bfbec909a75497a8811da3f560581
+size 302066952

gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_2441/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "trainer": {
+    "trainer_class": "TopKTrainer",
+    "dict_class": "AutoEncoderTopK",
+    "lr": 0.0003,
+    "steps": "2441",
+    "auxk_alpha": 0.03125,
+    "warmup_steps": 1000,
+    "decay_start": 195312,
+    "threshold_beta": 0.999,
+    "threshold_start_step": 1000,
+    "seed": 0,
+    "activation_dim": 2304,
+    "dict_size": 16384,
+    "k": 20,
+    "device": "cuda:0",
+    "layer": 12,
+    "lm_name": "google/gemma-2-2b",
+    "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_0",
+    "submodule_name": "resid_post_layer_12"
+  },
+  "buffer": {
+    "d_submodule": 2304,
+    "io": "out",
+    "n_ctxs": 244,
+    "ctx_len": 1024,
+    "refresh_batch_size": 4,
+    "out_batch_size": 2048,
+    "device": "cuda:0"
+  }
+}

gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_24414/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7b800c3e165fb8c2cf46b642132cb882a4700dccbbf51a5db26a9f9137945204
+size 302067154

gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_24414/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "trainer": {
+    "trainer_class": "TopKTrainer",
+    "dict_class": "AutoEncoderTopK",
+    "lr": 0.0003,
+    "steps": "24414",
+    "auxk_alpha": 0.03125,
+    "warmup_steps": 1000,
+    "decay_start": 195312,
+    "threshold_beta": 0.999,
+    "threshold_start_step": 1000,
+    "seed": 0,
+    "activation_dim": 2304,
+    "dict_size": 16384,
+    "k": 20,
+    "device": "cuda:0",
+    "layer": 12,
+    "lm_name": "google/gemma-2-2b",
+    "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_0",
+    "submodule_name": "resid_post_layer_12"
+  },
+  "buffer": {
+    "d_submodule": 2304,
+    "io": "out",
+    "n_ctxs": 244,
+    "ctx_len": 1024,
+    "refresh_batch_size": 4,
+    "out_batch_size": 2048,
+    "device": "cuda:0"
+  }
+}

gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_772/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3dd0e312fb5074492dec14c4f40c927633d9da963728f71b634e5b5d64a117d8
+size 302066878

gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_772/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "trainer": {
+    "trainer_class": "TopKTrainer",
+    "dict_class": "AutoEncoderTopK",
+    "lr": 0.0003,
+    "steps": "772",
+    "auxk_alpha": 0.03125,
+    "warmup_steps": 1000,
+    "decay_start": 195312,
+    "threshold_beta": 0.999,
+    "threshold_start_step": 1000,
+    "seed": 0,
+    "activation_dim": 2304,
+    "dict_size": 16384,
+    "k": 20,
+    "device": "cuda:0",
+    "layer": 12,
+    "lm_name": "google/gemma-2-2b",
+    "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_0",
+    "submodule_name": "resid_post_layer_12"
+  },
+  "buffer": {
+    "d_submodule": 2304,
+    "io": "out",
+    "n_ctxs": 244,
+    "ctx_len": 1024,
+    "refresh_batch_size": 4,
+    "out_batch_size": 2048,
+    "device": "cuda:0"
+  }
+}

gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_7720/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8571f0c90e6b64b8f0d72aadcc8c4d233c46d587ac1c4265534dafa606654b73
+size 302066952

gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_7720/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "trainer": {
+    "trainer_class": "TopKTrainer",
+    "dict_class": "AutoEncoderTopK",
+    "lr": 0.0003,
+    "steps": "7720",
+    "auxk_alpha": 0.03125,
+    "warmup_steps": 1000,
+    "decay_start": 195312,
+    "threshold_beta": 0.999,
+    "threshold_start_step": 1000,
+    "seed": 0,
+    "activation_dim": 2304,
+    "dict_size": 16384,
+    "k": 20,
+    "device": "cuda:0",
+    "layer": 12,
+    "lm_name": "google/gemma-2-2b",
+    "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_0",
+    "submodule_name": "resid_post_layer_12"
+  },
+  "buffer": {
+    "d_submodule": 2304,
+    "io": "out",
+    "n_ctxs": 244,
+    "ctx_len": 1024,
+    "refresh_batch_size": 4,
+    "out_batch_size": 2048,
+    "device": "cuda:0"
+  }
+}

gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_77203/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7568f021895dbdb9f70072dfe678cf488797d0bc4ffbeeacc4d662c0f7b08f1d
+size 302067154

gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_77203/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "trainer": {
+    "trainer_class": "TopKTrainer",
+    "dict_class": "AutoEncoderTopK",
+    "lr": 0.0003,
+    "steps": "77203",
+    "auxk_alpha": 0.03125,
+    "warmup_steps": 1000,
+    "decay_start": 195312,
+    "threshold_beta": 0.999,
+    "threshold_start_step": 1000,
+    "seed": 0,
+    "activation_dim": 2304,
+    "dict_size": 16384,
+    "k": 20,
+    "device": "cuda:0",
+    "layer": 12,
+    "lm_name": "google/gemma-2-2b",
+    "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_0",
+    "submodule_name": "resid_post_layer_12"
+  },
+  "buffer": {
+    "d_submodule": 2304,
+    "io": "out",
+    "n_ctxs": 244,
+    "ctx_len": 1024,
+    "refresh_batch_size": 4,
+    "out_batch_size": 2048,
+    "device": "cuda:0"
+  }
+}