Upload folder using huggingface_hub
Browse files- pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_0/ae.pt +3 -0
- pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_0/config.json +26 -0
- pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_1/ae.pt +3 -0
- pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_1/config.json +26 -0
- pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_10/ae.pt +3 -0
- pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_10/config.json +26 -0
- pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_11/ae.pt +3 -0
- pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_11/config.json +26 -0
- pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_2/ae.pt +3 -0
- pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_2/config.json +26 -0
- pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_3/ae.pt +3 -0
- pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_3/config.json +26 -0
- pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_4/ae.pt +3 -0
- pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_4/config.json +26 -0
- pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_5/ae.pt +3 -0
- pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_5/config.json +26 -0
- pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_6/ae.pt +3 -0
- pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_6/config.json +26 -0
- pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_7/ae.pt +3 -0
- pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_7/config.json +26 -0
- pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_8/ae.pt +3 -0
- pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_8/config.json +26 -0
- pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_9/ae.pt +3 -0
- pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_9/config.json +26 -0
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_0/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1431c749ac52b47aff72000c918904485faf4b2a0ea86e51ebefb53fa087da12
|
3 |
+
size 67178280
|
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_0/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopKAdditivity",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0002,
|
6 |
+
"steps": 48828,
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 512,
|
9 |
+
"dict_size": 16384,
|
10 |
+
"k": 20,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 4,
|
13 |
+
"lm_name": "EleutherAI/pythia-70m-deduped",
|
14 |
+
"wandb_name": "TopKTrainer_Additivity-EleutherAI/pythia-70m-deduped-resid_post_layer_4-add_coeff-0.0",
|
15 |
+
"submodule_name": "resid_post_layer_4"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 512,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 30000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 32,
|
23 |
+
"out_batch_size": 2048,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_1/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:26ec0ec6ecae0b9f5c5752e9622bf4c57d25f598eb28ddc4bad475d168e195b7
|
3 |
+
size 67178280
|
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_1/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopKAdditivity",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0002,
|
6 |
+
"steps": 48828,
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 512,
|
9 |
+
"dict_size": 16384,
|
10 |
+
"k": 20,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 4,
|
13 |
+
"lm_name": "EleutherAI/pythia-70m-deduped",
|
14 |
+
"wandb_name": "TopKTrainer_Additivity-EleutherAI/pythia-70m-deduped-resid_post_layer_4-add_coeff-0.01",
|
15 |
+
"submodule_name": "resid_post_layer_4"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 512,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 30000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 32,
|
23 |
+
"out_batch_size": 2048,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_10/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2eb4ded88e254e23f6a988a32e191b1df9e29bc9cd28e106c5804ae4b2681fa2
|
3 |
+
size 67178280
|
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_10/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopKAdditivity",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0002,
|
6 |
+
"steps": 48828,
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 512,
|
9 |
+
"dict_size": 16384,
|
10 |
+
"k": 313,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 4,
|
13 |
+
"lm_name": "EleutherAI/pythia-70m-deduped",
|
14 |
+
"wandb_name": "TopKTrainer_Additivity-EleutherAI/pythia-70m-deduped-resid_post_layer_4-add_coeff-0.01",
|
15 |
+
"submodule_name": "resid_post_layer_4"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 512,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 30000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 32,
|
23 |
+
"out_batch_size": 2048,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_11/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d5f7f505c3a9632060446c6464cc4f75695c47675910bb43d8571f0d691d45e
|
3 |
+
size 67178280
|
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_11/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopKAdditivity",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0002,
|
6 |
+
"steps": 48828,
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 512,
|
9 |
+
"dict_size": 16384,
|
10 |
+
"k": 313,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 4,
|
13 |
+
"lm_name": "EleutherAI/pythia-70m-deduped",
|
14 |
+
"wandb_name": "TopKTrainer_Additivity-EleutherAI/pythia-70m-deduped-resid_post_layer_4-add_coeff-1.0",
|
15 |
+
"submodule_name": "resid_post_layer_4"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 512,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 30000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 32,
|
23 |
+
"out_batch_size": 2048,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_2/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3db0023ed5735a80a7d559b6177badb6d0474032f96035decd87ab70e14cc4df
|
3 |
+
size 67178280
|
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_2/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopKAdditivity",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0002,
|
6 |
+
"steps": 48828,
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 512,
|
9 |
+
"dict_size": 16384,
|
10 |
+
"k": 20,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 4,
|
13 |
+
"lm_name": "EleutherAI/pythia-70m-deduped",
|
14 |
+
"wandb_name": "TopKTrainer_Additivity-EleutherAI/pythia-70m-deduped-resid_post_layer_4-add_coeff-1.0",
|
15 |
+
"submodule_name": "resid_post_layer_4"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 512,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 30000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 32,
|
23 |
+
"out_batch_size": 2048,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_3/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5454fa4a88fd0ce731b732568c02b3dc63dcb2ef074a46444503a5ccd3a58830
|
3 |
+
size 67178280
|
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_3/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopKAdditivity",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0002,
|
6 |
+
"steps": 48828,
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 512,
|
9 |
+
"dict_size": 16384,
|
10 |
+
"k": 50,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 4,
|
13 |
+
"lm_name": "EleutherAI/pythia-70m-deduped",
|
14 |
+
"wandb_name": "TopKTrainer_Additivity-EleutherAI/pythia-70m-deduped-resid_post_layer_4-add_coeff-0.0",
|
15 |
+
"submodule_name": "resid_post_layer_4"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 512,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 30000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 32,
|
23 |
+
"out_batch_size": 2048,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_4/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f8567d82197833fd81ba2747106ca031dfa63ca3444f9379978302fb17733858
|
3 |
+
size 67178280
|
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_4/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopKAdditivity",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0002,
|
6 |
+
"steps": 48828,
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 512,
|
9 |
+
"dict_size": 16384,
|
10 |
+
"k": 50,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 4,
|
13 |
+
"lm_name": "EleutherAI/pythia-70m-deduped",
|
14 |
+
"wandb_name": "TopKTrainer_Additivity-EleutherAI/pythia-70m-deduped-resid_post_layer_4-add_coeff-0.01",
|
15 |
+
"submodule_name": "resid_post_layer_4"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 512,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 30000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 32,
|
23 |
+
"out_batch_size": 2048,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_5/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:794d05db23050977855b50d5001b42ecd4ce6d3531e706b68376a4b02e32b837
|
3 |
+
size 67178280
|
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_5/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopKAdditivity",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0002,
|
6 |
+
"steps": 48828,
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 512,
|
9 |
+
"dict_size": 16384,
|
10 |
+
"k": 50,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 4,
|
13 |
+
"lm_name": "EleutherAI/pythia-70m-deduped",
|
14 |
+
"wandb_name": "TopKTrainer_Additivity-EleutherAI/pythia-70m-deduped-resid_post_layer_4-add_coeff-1.0",
|
15 |
+
"submodule_name": "resid_post_layer_4"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 512,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 30000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 32,
|
23 |
+
"out_batch_size": 2048,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_6/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:090a325a2cd96ce816121a4191047e7026ae3c7cb8f04f077c989c8186849de5
|
3 |
+
size 67178280
|
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_6/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopKAdditivity",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0002,
|
6 |
+
"steps": 48828,
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 512,
|
9 |
+
"dict_size": 16384,
|
10 |
+
"k": 125,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 4,
|
13 |
+
"lm_name": "EleutherAI/pythia-70m-deduped",
|
14 |
+
"wandb_name": "TopKTrainer_Additivity-EleutherAI/pythia-70m-deduped-resid_post_layer_4-add_coeff-0.0",
|
15 |
+
"submodule_name": "resid_post_layer_4"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 512,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 30000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 32,
|
23 |
+
"out_batch_size": 2048,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_7/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:82fed8eefc30988862c5010101dc1eaa35c68e814b1b19435b77cdea9edae13e
|
3 |
+
size 67178280
|
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_7/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopKAdditivity",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0002,
|
6 |
+
"steps": 48828,
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 512,
|
9 |
+
"dict_size": 16384,
|
10 |
+
"k": 125,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 4,
|
13 |
+
"lm_name": "EleutherAI/pythia-70m-deduped",
|
14 |
+
"wandb_name": "TopKTrainer_Additivity-EleutherAI/pythia-70m-deduped-resid_post_layer_4-add_coeff-0.01",
|
15 |
+
"submodule_name": "resid_post_layer_4"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 512,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 30000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 32,
|
23 |
+
"out_batch_size": 2048,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_8/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:110b0b8d59c5b0fb50b10b11df2a16814130bf8c7fe9c61348f9c69f6d1b8847
|
3 |
+
size 67178280
|
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_8/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopKAdditivity",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0002,
|
6 |
+
"steps": 48828,
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 512,
|
9 |
+
"dict_size": 16384,
|
10 |
+
"k": 125,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 4,
|
13 |
+
"lm_name": "EleutherAI/pythia-70m-deduped",
|
14 |
+
"wandb_name": "TopKTrainer_Additivity-EleutherAI/pythia-70m-deduped-resid_post_layer_4-add_coeff-1.0",
|
15 |
+
"submodule_name": "resid_post_layer_4"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 512,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 30000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 32,
|
23 |
+
"out_batch_size": 2048,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_9/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ebe966822a7d7ab598c2297141bf4b49dc853c6bc77860ed1836647951897765
|
3 |
+
size 67178280
|
pythia-70m-deduped_layer-4_topk_width-2pow14_date-1201/trainer_9/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopKAdditivity",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0002,
|
6 |
+
"steps": 48828,
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 512,
|
9 |
+
"dict_size": 16384,
|
10 |
+
"k": 313,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 4,
|
13 |
+
"lm_name": "EleutherAI/pythia-70m-deduped",
|
14 |
+
"wandb_name": "TopKTrainer_Additivity-EleutherAI/pythia-70m-deduped-resid_post_layer_4-add_coeff-0.0",
|
15 |
+
"submodule_name": "resid_post_layer_4"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 512,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 30000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 32,
|
23 |
+
"out_batch_size": 2048,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|