canrager commited on
Commit
e8eec3b
·
verified ·
1 Parent(s): 4eca020

Upload folder using huggingface_hub

Browse files
Files changed (24) hide show
  1. pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_0/ae.pt +3 -0
  2. pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_0/config.json +26 -0
  3. pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_1/ae.pt +3 -0
  4. pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_1/config.json +26 -0
  5. pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_10/ae.pt +3 -0
  6. pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_10/config.json +26 -0
  7. pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_11/ae.pt +3 -0
  8. pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_11/config.json +26 -0
  9. pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_2/ae.pt +3 -0
  10. pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_2/config.json +26 -0
  11. pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_3/ae.pt +3 -0
  12. pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_3/config.json +26 -0
  13. pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_4/ae.pt +3 -0
  14. pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_4/config.json +26 -0
  15. pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_5/ae.pt +3 -0
  16. pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_5/config.json +26 -0
  17. pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_6/ae.pt +3 -0
  18. pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_6/config.json +26 -0
  19. pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_7/ae.pt +3 -0
  20. pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_7/config.json +26 -0
  21. pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_8/ae.pt +3 -0
  22. pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_8/config.json +26 -0
  23. pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_9/ae.pt +3 -0
  24. pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_9/config.json +26 -0
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1431c749ac52b47aff72000c918904485faf4b2a0ea86e51ebefb53fa087da12
3
+ size 67178280
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_0/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopKAdditivity",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0002,
6
+ "steps": 48828,
7
+ "seed": 0,
8
+ "activation_dim": 512,
9
+ "dict_size": 16384,
10
+ "k": 20,
11
+ "device": "cuda:0",
12
+ "layer": 4,
13
+ "lm_name": "EleutherAI/pythia-70m-deduped",
14
+ "wandb_name": "TopKTrainer_Additivity-EleutherAI/pythia-70m-deduped-resid_post_layer_4-add_coeff-0.0",
15
+ "submodule_name": "resid_post_layer_4"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 512,
19
+ "io": "out",
20
+ "n_ctxs": 30000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 32,
23
+ "out_batch_size": 2048,
24
+ "device": "cuda:0"
25
+ }
26
+ }
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_1/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26ec0ec6ecae0b9f5c5752e9622bf4c57d25f598eb28ddc4bad475d168e195b7
3
+ size 67178280
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_1/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopKAdditivity",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0002,
6
+ "steps": 48828,
7
+ "seed": 0,
8
+ "activation_dim": 512,
9
+ "dict_size": 16384,
10
+ "k": 20,
11
+ "device": "cuda:0",
12
+ "layer": 4,
13
+ "lm_name": "EleutherAI/pythia-70m-deduped",
14
+ "wandb_name": "TopKTrainer_Additivity-EleutherAI/pythia-70m-deduped-resid_post_layer_4-add_coeff-0.01",
15
+ "submodule_name": "resid_post_layer_4"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 512,
19
+ "io": "out",
20
+ "n_ctxs": 30000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 32,
23
+ "out_batch_size": 2048,
24
+ "device": "cuda:0"
25
+ }
26
+ }
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_10/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2eb4ded88e254e23f6a988a32e191b1df9e29bc9cd28e106c5804ae4b2681fa2
3
+ size 67178280
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_10/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopKAdditivity",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0002,
6
+ "steps": 48828,
7
+ "seed": 0,
8
+ "activation_dim": 512,
9
+ "dict_size": 16384,
10
+ "k": 313,
11
+ "device": "cuda:0",
12
+ "layer": 4,
13
+ "lm_name": "EleutherAI/pythia-70m-deduped",
14
+ "wandb_name": "TopKTrainer_Additivity-EleutherAI/pythia-70m-deduped-resid_post_layer_4-add_coeff-0.01",
15
+ "submodule_name": "resid_post_layer_4"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 512,
19
+ "io": "out",
20
+ "n_ctxs": 30000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 32,
23
+ "out_batch_size": 2048,
24
+ "device": "cuda:0"
25
+ }
26
+ }
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_11/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d5f7f505c3a9632060446c6464cc4f75695c47675910bb43d8571f0d691d45e
3
+ size 67178280
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_11/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopKAdditivity",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0002,
6
+ "steps": 48828,
7
+ "seed": 0,
8
+ "activation_dim": 512,
9
+ "dict_size": 16384,
10
+ "k": 313,
11
+ "device": "cuda:0",
12
+ "layer": 4,
13
+ "lm_name": "EleutherAI/pythia-70m-deduped",
14
+ "wandb_name": "TopKTrainer_Additivity-EleutherAI/pythia-70m-deduped-resid_post_layer_4-add_coeff-1.0",
15
+ "submodule_name": "resid_post_layer_4"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 512,
19
+ "io": "out",
20
+ "n_ctxs": 30000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 32,
23
+ "out_batch_size": 2048,
24
+ "device": "cuda:0"
25
+ }
26
+ }
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_2/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3db0023ed5735a80a7d559b6177badb6d0474032f96035decd87ab70e14cc4df
3
+ size 67178280
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_2/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopKAdditivity",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0002,
6
+ "steps": 48828,
7
+ "seed": 0,
8
+ "activation_dim": 512,
9
+ "dict_size": 16384,
10
+ "k": 20,
11
+ "device": "cuda:0",
12
+ "layer": 4,
13
+ "lm_name": "EleutherAI/pythia-70m-deduped",
14
+ "wandb_name": "TopKTrainer_Additivity-EleutherAI/pythia-70m-deduped-resid_post_layer_4-add_coeff-1.0",
15
+ "submodule_name": "resid_post_layer_4"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 512,
19
+ "io": "out",
20
+ "n_ctxs": 30000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 32,
23
+ "out_batch_size": 2048,
24
+ "device": "cuda:0"
25
+ }
26
+ }
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_3/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5454fa4a88fd0ce731b732568c02b3dc63dcb2ef074a46444503a5ccd3a58830
3
+ size 67178280
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_3/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopKAdditivity",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0002,
6
+ "steps": 48828,
7
+ "seed": 0,
8
+ "activation_dim": 512,
9
+ "dict_size": 16384,
10
+ "k": 50,
11
+ "device": "cuda:0",
12
+ "layer": 4,
13
+ "lm_name": "EleutherAI/pythia-70m-deduped",
14
+ "wandb_name": "TopKTrainer_Additivity-EleutherAI/pythia-70m-deduped-resid_post_layer_4-add_coeff-0.0",
15
+ "submodule_name": "resid_post_layer_4"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 512,
19
+ "io": "out",
20
+ "n_ctxs": 30000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 32,
23
+ "out_batch_size": 2048,
24
+ "device": "cuda:0"
25
+ }
26
+ }
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_4/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8567d82197833fd81ba2747106ca031dfa63ca3444f9379978302fb17733858
3
+ size 67178280
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_4/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopKAdditivity",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0002,
6
+ "steps": 48828,
7
+ "seed": 0,
8
+ "activation_dim": 512,
9
+ "dict_size": 16384,
10
+ "k": 50,
11
+ "device": "cuda:0",
12
+ "layer": 4,
13
+ "lm_name": "EleutherAI/pythia-70m-deduped",
14
+ "wandb_name": "TopKTrainer_Additivity-EleutherAI/pythia-70m-deduped-resid_post_layer_4-add_coeff-0.01",
15
+ "submodule_name": "resid_post_layer_4"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 512,
19
+ "io": "out",
20
+ "n_ctxs": 30000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 32,
23
+ "out_batch_size": 2048,
24
+ "device": "cuda:0"
25
+ }
26
+ }
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_5/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:794d05db23050977855b50d5001b42ecd4ce6d3531e706b68376a4b02e32b837
3
+ size 67178280
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_5/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopKAdditivity",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0002,
6
+ "steps": 48828,
7
+ "seed": 0,
8
+ "activation_dim": 512,
9
+ "dict_size": 16384,
10
+ "k": 50,
11
+ "device": "cuda:0",
12
+ "layer": 4,
13
+ "lm_name": "EleutherAI/pythia-70m-deduped",
14
+ "wandb_name": "TopKTrainer_Additivity-EleutherAI/pythia-70m-deduped-resid_post_layer_4-add_coeff-1.0",
15
+ "submodule_name": "resid_post_layer_4"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 512,
19
+ "io": "out",
20
+ "n_ctxs": 30000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 32,
23
+ "out_batch_size": 2048,
24
+ "device": "cuda:0"
25
+ }
26
+ }
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_6/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:090a325a2cd96ce816121a4191047e7026ae3c7cb8f04f077c989c8186849de5
3
+ size 67178280
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_6/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopKAdditivity",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0002,
6
+ "steps": 48828,
7
+ "seed": 0,
8
+ "activation_dim": 512,
9
+ "dict_size": 16384,
10
+ "k": 125,
11
+ "device": "cuda:0",
12
+ "layer": 4,
13
+ "lm_name": "EleutherAI/pythia-70m-deduped",
14
+ "wandb_name": "TopKTrainer_Additivity-EleutherAI/pythia-70m-deduped-resid_post_layer_4-add_coeff-0.0",
15
+ "submodule_name": "resid_post_layer_4"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 512,
19
+ "io": "out",
20
+ "n_ctxs": 30000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 32,
23
+ "out_batch_size": 2048,
24
+ "device": "cuda:0"
25
+ }
26
+ }
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_7/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82fed8eefc30988862c5010101dc1eaa35c68e814b1b19435b77cdea9edae13e
3
+ size 67178280
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_7/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopKAdditivity",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0002,
6
+ "steps": 48828,
7
+ "seed": 0,
8
+ "activation_dim": 512,
9
+ "dict_size": 16384,
10
+ "k": 125,
11
+ "device": "cuda:0",
12
+ "layer": 4,
13
+ "lm_name": "EleutherAI/pythia-70m-deduped",
14
+ "wandb_name": "TopKTrainer_Additivity-EleutherAI/pythia-70m-deduped-resid_post_layer_4-add_coeff-0.01",
15
+ "submodule_name": "resid_post_layer_4"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 512,
19
+ "io": "out",
20
+ "n_ctxs": 30000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 32,
23
+ "out_batch_size": 2048,
24
+ "device": "cuda:0"
25
+ }
26
+ }
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_8/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:110b0b8d59c5b0fb50b10b11df2a16814130bf8c7fe9c61348f9c69f6d1b8847
3
+ size 67178280
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_8/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopKAdditivity",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0002,
6
+ "steps": 48828,
7
+ "seed": 0,
8
+ "activation_dim": 512,
9
+ "dict_size": 16384,
10
+ "k": 125,
11
+ "device": "cuda:0",
12
+ "layer": 4,
13
+ "lm_name": "EleutherAI/pythia-70m-deduped",
14
+ "wandb_name": "TopKTrainer_Additivity-EleutherAI/pythia-70m-deduped-resid_post_layer_4-add_coeff-1.0",
15
+ "submodule_name": "resid_post_layer_4"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 512,
19
+ "io": "out",
20
+ "n_ctxs": 30000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 32,
23
+ "out_batch_size": 2048,
24
+ "device": "cuda:0"
25
+ }
26
+ }
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_9/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebe966822a7d7ab598c2297141bf4b49dc853c6bc77860ed1836647951897765
3
+ size 67178280
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_9/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopKAdditivity",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0002,
6
+ "steps": 48828,
7
+ "seed": 0,
8
+ "activation_dim": 512,
9
+ "dict_size": 16384,
10
+ "k": 313,
11
+ "device": "cuda:0",
12
+ "layer": 4,
13
+ "lm_name": "EleutherAI/pythia-70m-deduped",
14
+ "wandb_name": "TopKTrainer_Additivity-EleutherAI/pythia-70m-deduped-resid_post_layer_4-add_coeff-0.0",
15
+ "submodule_name": "resid_post_layer_4"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 512,
19
+ "io": "out",
20
+ "n_ctxs": 30000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 32,
23
+ "out_batch_size": 2048,
24
+ "device": "cuda:0"
25
+ }
26
+ }