canrager commited on
Commit
18190d5
·
verified ·
1 Parent(s): c69040d

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_10/ae.pt +3 -0
  2. gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_10/config.json +32 -0
  3. gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_10/eval_results.json +1 -0
  4. gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_11/ae.pt +3 -0
  5. gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_11/config.json +32 -0
  6. gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_11/eval_results.json +1 -0
  7. gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_6/ae.pt +3 -0
  8. gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_6/config.json +32 -0
  9. gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_6/eval_results.json +1 -0
  10. gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_7/ae.pt +3 -0
  11. gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_7/config.json +32 -0
  12. gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_7/eval_results.json +1 -0
  13. gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_8/ae.pt +3 -0
  14. gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_8/config.json +32 -0
  15. gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_8/eval_results.json +1 -0
  16. gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_9/ae.pt +3 -0
  17. gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_9/config.json +32 -0
  18. gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_9/eval_results.json +1 -0
  19. gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_0/ae.pt +3 -0
  20. gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_0/config.json +31 -0
  21. gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_0/eval_results.json +1 -0
  22. gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_1/ae.pt +3 -0
  23. gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_1/config.json +31 -0
  24. gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_1/eval_results.json +1 -0
  25. gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_2/ae.pt +3 -0
  26. gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_2/config.json +31 -0
  27. gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_2/eval_results.json +1 -0
  28. gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_3/ae.pt +3 -0
  29. gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_3/config.json +31 -0
  30. gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_3/eval_results.json +1 -0
  31. gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_4/ae.pt +3 -0
  32. gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_4/config.json +31 -0
  33. gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_4/eval_results.json +1 -0
  34. gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_5/ae.pt +3 -0
  35. gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_5/config.json +31 -0
  36. gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_5/eval_results.json +1 -0
  37. gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_0/ae.pt +3 -0
  38. gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_0/config.json +31 -0
  39. gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_244/ae.pt +3 -0
  40. gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_244/config.json +31 -0
  41. gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_2441/ae.pt +3 -0
  42. gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_2441/config.json +31 -0
  43. gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_24414/ae.pt +3 -0
  44. gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_24414/config.json +31 -0
  45. gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_772/ae.pt +3 -0
  46. gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_772/config.json +31 -0
  47. gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_7720/ae.pt +3 -0
  48. gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_7720/config.json +31 -0
  49. gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_77203/ae.pt +3 -0
  50. gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_77203/config.json +31 -0
gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_10/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc8f9d716c77c9ae84b25674b076b969f4e52947c94b3a450f4214bfe26f37be
3
+ size 302066710
gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_10/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKTrainer",
4
+ "dict_class": "BatchTopKSAE",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 1152,
13
+ "seed": 0,
14
+ "activation_dim": 2304,
15
+ "dict_size": 16384,
16
+ "k": 320,
17
+ "device": "cuda:0",
18
+ "layer": 12,
19
+ "lm_name": "google/gemma-2-2b",
20
+ "wandb_name": "BatchTopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_10",
21
+ "submodule_name": "resid_post_layer_12"
22
+ },
23
+ "buffer": {
24
+ "d_submodule": 2304,
25
+ "io": "out",
26
+ "n_ctxs": 244,
27
+ "ctx_len": 1024,
28
+ "refresh_batch_size": 4,
29
+ "out_batch_size": 2048,
30
+ "device": "cuda:0"
31
+ }
32
+ }
gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_10/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 42.79875, "l1_loss": 1562.96, "l0": 321.2723876953125, "frac_variance_explained": 0.90958984375, "cossim": 0.953125, "l2_ratio": 0.95298828125, "relative_reconstruction_bias": 1.00021484375, "loss_original": 2.152294921875, "loss_reconstructed": 2.18591796875, "loss_zero": 12.4375, "frac_recovered": 0.996171875, "frac_alive": 0.96368408203125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_11/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe22ddf8d9511da263f3106bc3fbaf500b6dc0701f6c1085d5f7b47ac6415447
3
+ size 302066710
gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_11/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKTrainer",
4
+ "dict_class": "BatchTopKSAE",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 1152,
13
+ "seed": 0,
14
+ "activation_dim": 2304,
15
+ "dict_size": 16384,
16
+ "k": 640,
17
+ "device": "cuda:0",
18
+ "layer": 12,
19
+ "lm_name": "google/gemma-2-2b",
20
+ "wandb_name": "BatchTopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_11",
21
+ "submodule_name": "resid_post_layer_12"
22
+ },
23
+ "buffer": {
24
+ "d_submodule": 2304,
25
+ "io": "out",
26
+ "n_ctxs": 244,
27
+ "ctx_len": 1024,
28
+ "refresh_batch_size": 4,
29
+ "out_batch_size": 2048,
30
+ "device": "cuda:0"
31
+ }
32
+ }
gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_11/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 34.8, "l1_loss": 2747.28, "l0": 642.0887109375, "frac_variance_explained": 0.94068359375, "cossim": 0.968359375, "l2_ratio": 0.9684765625, "relative_reconstruction_bias": 0.999453125, "loss_original": 2.152294921875, "loss_reconstructed": 2.16748046875, "loss_zero": 12.4375, "frac_recovered": 0.99841796875, "frac_alive": 0.83428955078125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_6/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01638d64af64ef161f923e2ca4d20742581452606d7cb0f491d66c146ed1962a
3
+ size 302066710
gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_6/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKTrainer",
4
+ "dict_class": "BatchTopKSAE",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 1152,
13
+ "seed": 0,
14
+ "activation_dim": 2304,
15
+ "dict_size": 16384,
16
+ "k": 20,
17
+ "device": "cuda:0",
18
+ "layer": 12,
19
+ "lm_name": "google/gemma-2-2b",
20
+ "wandb_name": "BatchTopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_6",
21
+ "submodule_name": "resid_post_layer_12"
22
+ },
23
+ "buffer": {
24
+ "d_submodule": 2304,
25
+ "io": "out",
26
+ "n_ctxs": 244,
27
+ "ctx_len": 1024,
28
+ "refresh_batch_size": 4,
29
+ "out_batch_size": 2048,
30
+ "device": "cuda:0"
31
+ }
32
+ }
gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_6/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 64.55875, "l1_loss": 286.01, "l0": 19.994365234375, "frac_variance_explained": 0.790859375, "cossim": 0.89044921875, "l2_ratio": 0.890546875, "relative_reconstruction_bias": 1.00109375, "loss_original": 2.152294921875, "loss_reconstructed": 2.385322265625, "loss_zero": 12.4375, "frac_recovered": 0.97697265625, "frac_alive": 0.97857666015625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_7/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5477b613d15f981e2767354521bafe5cbc85e33189b6064b0a327d0ff40f04bd
3
+ size 302066710
gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_7/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKTrainer",
4
+ "dict_class": "BatchTopKSAE",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 1152,
13
+ "seed": 0,
14
+ "activation_dim": 2304,
15
+ "dict_size": 16384,
16
+ "k": 40,
17
+ "device": "cuda:0",
18
+ "layer": 12,
19
+ "lm_name": "google/gemma-2-2b",
20
+ "wandb_name": "BatchTopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_7",
21
+ "submodule_name": "resid_post_layer_12"
22
+ },
23
+ "buffer": {
24
+ "d_submodule": 2304,
25
+ "io": "out",
26
+ "n_ctxs": 244,
27
+ "ctx_len": 1024,
28
+ "refresh_batch_size": 4,
29
+ "out_batch_size": 2048,
30
+ "device": "cuda:0"
31
+ }
32
+ }
gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_7/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 59.685, "l1_loss": 387.52, "l0": 39.811513671875, "frac_variance_explained": 0.82166015625, "cossim": 0.90671875, "l2_ratio": 0.90642578125, "relative_reconstruction_bias": 1.000546875, "loss_original": 2.152294921875, "loss_reconstructed": 2.2889453125, "loss_zero": 12.4375, "frac_recovered": 0.98634765625, "frac_alive": 0.98846435546875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_8/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7dab4effd204e72a9f6df3c3ba4ae455f86864b10c10541f26f83cb8ed961a49
3
+ size 302066710
gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_8/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKTrainer",
4
+ "dict_class": "BatchTopKSAE",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 1152,
13
+ "seed": 0,
14
+ "activation_dim": 2304,
15
+ "dict_size": 16384,
16
+ "k": 80,
17
+ "device": "cuda:0",
18
+ "layer": 12,
19
+ "lm_name": "google/gemma-2-2b",
20
+ "wandb_name": "BatchTopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_8",
21
+ "submodule_name": "resid_post_layer_12"
22
+ },
23
+ "buffer": {
24
+ "d_submodule": 2304,
25
+ "io": "out",
26
+ "n_ctxs": 244,
27
+ "ctx_len": 1024,
28
+ "refresh_batch_size": 4,
29
+ "out_batch_size": 2048,
30
+ "device": "cuda:0"
31
+ }
32
+ }
gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_8/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 54.58, "l1_loss": 543.6, "l0": 80.091689453125, "frac_variance_explained": 0.8526171875, "cossim": 0.9221484375, "l2_ratio": 0.92234375, "relative_reconstruction_bias": 1.00125, "loss_original": 2.152294921875, "loss_reconstructed": 2.23978515625, "loss_zero": 12.4375, "frac_recovered": 0.9907421875, "frac_alive": 0.99298095703125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_9/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcb46b6ec70f4b971faf416bceb715ba7aeba7850241111a62255128c3d57f13
3
+ size 302066710
gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_9/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKTrainer",
4
+ "dict_class": "BatchTopKSAE",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 1152,
13
+ "seed": 0,
14
+ "activation_dim": 2304,
15
+ "dict_size": 16384,
16
+ "k": 160,
17
+ "device": "cuda:0",
18
+ "layer": 12,
19
+ "lm_name": "google/gemma-2-2b",
20
+ "wandb_name": "BatchTopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_9",
21
+ "submodule_name": "resid_post_layer_12"
22
+ },
23
+ "buffer": {
24
+ "d_submodule": 2304,
25
+ "io": "out",
26
+ "n_ctxs": 244,
27
+ "ctx_len": 1024,
28
+ "refresh_batch_size": 4,
29
+ "out_batch_size": 2048,
30
+ "device": "cuda:0"
31
+ }
32
+ }
gemma-2-2b_batch_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_9/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 49.27125, "l1_loss": 898.26, "l0": 158.950009765625, "frac_variance_explained": 0.88154296875, "cossim": 0.93748046875, "l2_ratio": 0.93708984375, "relative_reconstruction_bias": 1.00015625, "loss_original": 2.152294921875, "loss_reconstructed": 2.206025390625, "loss_zero": 12.4375, "frac_recovered": 0.99396484375, "frac_alive": 0.9896240234375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b59deb52ed3542b238759a5c63a5d13efdada046e8be6fd08e77987cf0a9ccd
3
+ size 302066710
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_0/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TopKTrainer",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "seed": 0,
13
+ "activation_dim": 2304,
14
+ "dict_size": 16384,
15
+ "k": 20,
16
+ "device": "cuda:0",
17
+ "layer": 12,
18
+ "lm_name": "google/gemma-2-2b",
19
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_0",
20
+ "submodule_name": "resid_post_layer_12"
21
+ },
22
+ "buffer": {
23
+ "d_submodule": 2304,
24
+ "io": "out",
25
+ "n_ctxs": 244,
26
+ "ctx_len": 1024,
27
+ "refresh_batch_size": 4,
28
+ "out_batch_size": 2048,
29
+ "device": "cuda:0"
30
+ }
31
+ }
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_0/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 64.96625, "l1_loss": 280.28, "l0": 19.997119140625, "frac_variance_explained": 0.78849609375, "cossim": 0.88927734375, "l2_ratio": 0.889609375, "relative_reconstruction_bias": 1.0008203125, "loss_original": 2.152294921875, "loss_reconstructed": 2.403408203125, "loss_zero": 12.4375, "frac_recovered": 0.9754296875, "frac_alive": 0.949951171875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_1/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b0757ae9e45d28c0f15487b22b32bd47e8a2f3ebfb7f3ef2c6ed01dcf848de0
3
+ size 302066710
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_1/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TopKTrainer",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "seed": 0,
13
+ "activation_dim": 2304,
14
+ "dict_size": 16384,
15
+ "k": 40,
16
+ "device": "cuda:0",
17
+ "layer": 12,
18
+ "lm_name": "google/gemma-2-2b",
19
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_1",
20
+ "submodule_name": "resid_post_layer_12"
21
+ },
22
+ "buffer": {
23
+ "d_submodule": 2304,
24
+ "io": "out",
25
+ "n_ctxs": 244,
26
+ "ctx_len": 1024,
27
+ "refresh_batch_size": 4,
28
+ "out_batch_size": 2048,
29
+ "device": "cuda:0"
30
+ }
31
+ }
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_1/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 59.7275, "l1_loss": 382.27, "l0": 39.99611328125, "frac_variance_explained": 0.81900390625, "cossim": 0.90693359375, "l2_ratio": 0.90705078125, "relative_reconstruction_bias": 1.00095703125, "loss_original": 2.152294921875, "loss_reconstructed": 2.30181640625, "loss_zero": 12.4375, "frac_recovered": 0.985234375, "frac_alive": 0.98614501953125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_2/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:683749246f05b505533f29b04e9f04f4e2260dacb0c6e329422ed5a2934992f6
3
+ size 302066710
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_2/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TopKTrainer",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "seed": 0,
13
+ "activation_dim": 2304,
14
+ "dict_size": 16384,
15
+ "k": 80,
16
+ "device": "cuda:0",
17
+ "layer": 12,
18
+ "lm_name": "google/gemma-2-2b",
19
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_2",
20
+ "submodule_name": "resid_post_layer_12"
21
+ },
22
+ "buffer": {
23
+ "d_submodule": 2304,
24
+ "io": "out",
25
+ "n_ctxs": 244,
26
+ "ctx_len": 1024,
27
+ "refresh_batch_size": 4,
28
+ "out_batch_size": 2048,
29
+ "device": "cuda:0"
30
+ }
31
+ }
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_2/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 54.65125, "l1_loss": 529.98, "l0": 79.9989892578125, "frac_variance_explained": 0.84908203125, "cossim": 0.92232421875, "l2_ratio": 0.9225, "relative_reconstruction_bias": 1.00234375, "loss_original": 2.152294921875, "loss_reconstructed": 2.249189453125, "loss_zero": 12.4375, "frac_recovered": 0.99015625, "frac_alive": 0.9962158203125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_3/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:203a4f1acc8fc820a165a1245e7344959a603c0b2da8b9655b151c06ebba51d8
3
+ size 302066710
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_3/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TopKTrainer",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "seed": 0,
13
+ "activation_dim": 2304,
14
+ "dict_size": 16384,
15
+ "k": 160,
16
+ "device": "cuda:0",
17
+ "layer": 12,
18
+ "lm_name": "google/gemma-2-2b",
19
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_3",
20
+ "submodule_name": "resid_post_layer_12"
21
+ },
22
+ "buffer": {
23
+ "d_submodule": 2304,
24
+ "io": "out",
25
+ "n_ctxs": 244,
26
+ "ctx_len": 1024,
27
+ "refresh_batch_size": 4,
28
+ "out_batch_size": 2048,
29
+ "device": "cuda:0"
30
+ }
31
+ }
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_3/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 49.2875, "l1_loss": 773.14, "l0": 159.99974609375, "frac_variance_explained": 0.87669921875, "cossim": 0.9375, "l2_ratio": 0.93748046875, "relative_reconstruction_bias": 0.99919921875, "loss_original": 2.152294921875, "loss_reconstructed": 2.212333984375, "loss_zero": 12.4375, "frac_recovered": 0.9933984375, "frac_alive": 0.99652099609375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_4/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:384f7d4facce581a40eb20bf342fea7bef2a6e416b7b39532485155cd916bab3
3
+ size 302066710
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_4/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TopKTrainer",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "seed": 0,
13
+ "activation_dim": 2304,
14
+ "dict_size": 16384,
15
+ "k": 320,
16
+ "device": "cuda:0",
17
+ "layer": 12,
18
+ "lm_name": "google/gemma-2-2b",
19
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_4",
20
+ "submodule_name": "resid_post_layer_12"
21
+ },
22
+ "buffer": {
23
+ "d_submodule": 2304,
24
+ "io": "out",
25
+ "n_ctxs": 244,
26
+ "ctx_len": 1024,
27
+ "refresh_batch_size": 4,
28
+ "out_batch_size": 2048,
29
+ "device": "cuda:0"
30
+ }
31
+ }
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_4/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 42.815, "l1_loss": 1410.92, "l0": 319.9121923828125, "frac_variance_explained": 0.90875, "cossim": 0.953125, "l2_ratio": 0.953125, "relative_reconstruction_bias": 1.00017578125, "loss_original": 2.152294921875, "loss_reconstructed": 2.18783203125, "loss_zero": 12.4375, "frac_recovered": 0.99615234375, "frac_alive": 0.99505615234375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_5/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f6efe1d15269f95285203ba07c8b2872302e3494e98e57b3ba098883ff9df8a
3
+ size 302066710
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_5/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TopKTrainer",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "seed": 0,
13
+ "activation_dim": 2304,
14
+ "dict_size": 16384,
15
+ "k": 640,
16
+ "device": "cuda:0",
17
+ "layer": 12,
18
+ "lm_name": "google/gemma-2-2b",
19
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_5",
20
+ "submodule_name": "resid_post_layer_12"
21
+ },
22
+ "buffer": {
23
+ "d_submodule": 2304,
24
+ "io": "out",
25
+ "n_ctxs": 244,
26
+ "ctx_len": 1024,
27
+ "refresh_batch_size": 4,
28
+ "out_batch_size": 2048,
29
+ "device": "cuda:0"
30
+ }
31
+ }
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12/trainer_5/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 34.06125, "l1_loss": 2503.6, "l0": 639.193583984375, "frac_variance_explained": 0.94197265625, "cossim": 0.97115234375, "l2_ratio": 0.9715625, "relative_reconstruction_bias": 1.0005859375, "loss_original": 2.152294921875, "loss_reconstructed": 2.167255859375, "loss_zero": 12.4375, "frac_recovered": 0.998359375, "frac_alive": 0.95263671875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:131924249230717015dfb802e9b890d7fec441de39c2d79d5b786d2571a8d689
3
+ size 302066858
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_0/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TopKTrainer",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0003,
6
+ "steps": "0",
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "seed": 0,
13
+ "activation_dim": 2304,
14
+ "dict_size": 16384,
15
+ "k": 20,
16
+ "device": "cuda:0",
17
+ "layer": 12,
18
+ "lm_name": "google/gemma-2-2b",
19
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_0",
20
+ "submodule_name": "resid_post_layer_12"
21
+ },
22
+ "buffer": {
23
+ "d_submodule": 2304,
24
+ "io": "out",
25
+ "n_ctxs": 244,
26
+ "ctx_len": 1024,
27
+ "refresh_batch_size": 4,
28
+ "out_batch_size": 2048,
29
+ "device": "cuda:0"
30
+ }
31
+ }
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_244/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:715c667b68c8e965808597605943b7d4f28345c10f385bf7a12b469e3dd0d042
3
+ size 302066878
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_244/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TopKTrainer",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0003,
6
+ "steps": "244",
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "seed": 0,
13
+ "activation_dim": 2304,
14
+ "dict_size": 16384,
15
+ "k": 20,
16
+ "device": "cuda:0",
17
+ "layer": 12,
18
+ "lm_name": "google/gemma-2-2b",
19
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_0",
20
+ "submodule_name": "resid_post_layer_12"
21
+ },
22
+ "buffer": {
23
+ "d_submodule": 2304,
24
+ "io": "out",
25
+ "n_ctxs": 244,
26
+ "ctx_len": 1024,
27
+ "refresh_batch_size": 4,
28
+ "out_batch_size": 2048,
29
+ "device": "cuda:0"
30
+ }
31
+ }
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_2441/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8e60b992aa98c678742d578f0790d9a8d3bfbec909a75497a8811da3f560581
3
+ size 302066952
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_2441/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TopKTrainer",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0003,
6
+ "steps": "2441",
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "seed": 0,
13
+ "activation_dim": 2304,
14
+ "dict_size": 16384,
15
+ "k": 20,
16
+ "device": "cuda:0",
17
+ "layer": 12,
18
+ "lm_name": "google/gemma-2-2b",
19
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_0",
20
+ "submodule_name": "resid_post_layer_12"
21
+ },
22
+ "buffer": {
23
+ "d_submodule": 2304,
24
+ "io": "out",
25
+ "n_ctxs": 244,
26
+ "ctx_len": 1024,
27
+ "refresh_batch_size": 4,
28
+ "out_batch_size": 2048,
29
+ "device": "cuda:0"
30
+ }
31
+ }
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_24414/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b800c3e165fb8c2cf46b642132cb882a4700dccbbf51a5db26a9f9137945204
3
+ size 302067154
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_24414/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TopKTrainer",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0003,
6
+ "steps": "24414",
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "seed": 0,
13
+ "activation_dim": 2304,
14
+ "dict_size": 16384,
15
+ "k": 20,
16
+ "device": "cuda:0",
17
+ "layer": 12,
18
+ "lm_name": "google/gemma-2-2b",
19
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_0",
20
+ "submodule_name": "resid_post_layer_12"
21
+ },
22
+ "buffer": {
23
+ "d_submodule": 2304,
24
+ "io": "out",
25
+ "n_ctxs": 244,
26
+ "ctx_len": 1024,
27
+ "refresh_batch_size": 4,
28
+ "out_batch_size": 2048,
29
+ "device": "cuda:0"
30
+ }
31
+ }
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_772/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dd0e312fb5074492dec14c4f40c927633d9da963728f71b634e5b5d64a117d8
3
+ size 302066878
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_772/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TopKTrainer",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0003,
6
+ "steps": "772",
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "seed": 0,
13
+ "activation_dim": 2304,
14
+ "dict_size": 16384,
15
+ "k": 20,
16
+ "device": "cuda:0",
17
+ "layer": 12,
18
+ "lm_name": "google/gemma-2-2b",
19
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_0",
20
+ "submodule_name": "resid_post_layer_12"
21
+ },
22
+ "buffer": {
23
+ "d_submodule": 2304,
24
+ "io": "out",
25
+ "n_ctxs": 244,
26
+ "ctx_len": 1024,
27
+ "refresh_batch_size": 4,
28
+ "out_batch_size": 2048,
29
+ "device": "cuda:0"
30
+ }
31
+ }
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_7720/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8571f0c90e6b64b8f0d72aadcc8c4d233c46d587ac1c4265534dafa606654b73
3
+ size 302066952
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_7720/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TopKTrainer",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0003,
6
+ "steps": "7720",
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "seed": 0,
13
+ "activation_dim": 2304,
14
+ "dict_size": 16384,
15
+ "k": 20,
16
+ "device": "cuda:0",
17
+ "layer": 12,
18
+ "lm_name": "google/gemma-2-2b",
19
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_0",
20
+ "submodule_name": "resid_post_layer_12"
21
+ },
22
+ "buffer": {
23
+ "d_submodule": 2304,
24
+ "io": "out",
25
+ "n_ctxs": 244,
26
+ "ctx_len": 1024,
27
+ "refresh_batch_size": 4,
28
+ "out_batch_size": 2048,
29
+ "device": "cuda:0"
30
+ }
31
+ }
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_77203/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7568f021895dbdb9f70072dfe678cf488797d0bc4ffbeeacc4d662c0f7b08f1d
3
+ size 302067154
gemma-2-2b_top_k_width-2pow14_date-0107/resid_post_layer_12_checkpoints/trainer_0_step_77203/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TopKTrainer",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0003,
6
+ "steps": "77203",
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "seed": 0,
13
+ "activation_dim": 2304,
14
+ "dict_size": 16384,
15
+ "k": 20,
16
+ "device": "cuda:0",
17
+ "layer": 12,
18
+ "lm_name": "google/gemma-2-2b",
19
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_0",
20
+ "submodule_name": "resid_post_layer_12"
21
+ },
22
+ "buffer": {
23
+ "d_submodule": 2304,
24
+ "io": "out",
25
+ "n_ctxs": 244,
26
+ "ctx_len": 1024,
27
+ "refresh_batch_size": 4,
28
+ "out_batch_size": 2048,
29
+ "device": "cuda:0"
30
+ }
31
+ }