Narmeen07 commited on
Commit
15854a5
·
verified ·
1 Parent(s): b8eb139

Upload folder using huggingface_hub

Browse files
Files changed (36) hide show
  1. ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-entropy-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_1_seed0/ae.pt +3 -0
  2. ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-entropy-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_1_seed0/config.json +37 -0
  3. ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-entropy-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_1_seed0/eval_results.json +1 -0
  4. ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-entropy-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_5_seed1/ae.pt +3 -0
  5. ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-entropy-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_5_seed1/config.json +37 -0
  6. ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-entropy-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_5_seed1/eval_results.json +1 -0
  7. ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-entropy-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_9_seed2/ae.pt +3 -0
  8. ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-entropy-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_9_seed2/config.json +37 -0
  9. ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-entropy-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_9_seed2/eval_results.json +1 -0
  10. ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-l2_norm-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_11_seed2/ae.pt +3 -0
  11. ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-l2_norm-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_11_seed2/config.json +37 -0
  12. ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-l2_norm-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_11_seed2/eval_results.json +1 -0
  13. ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-l2_norm-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_3_seed0/ae.pt +3 -0
  14. ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-l2_norm-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_3_seed0/config.json +37 -0
  15. ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-l2_norm-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_3_seed0/eval_results.json +1 -0
  16. ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-l2_norm-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_7_seed1/ae.pt +3 -0
  17. ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-l2_norm-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_7_seed1/config.json +37 -0
  18. ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-l2_norm-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_7_seed1/eval_results.json +1 -0
  19. ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-leverage-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_0_seed0/ae.pt +3 -0
  20. ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-leverage-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_0_seed0/config.json +37 -0
  21. ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-leverage-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_0_seed0/eval_results.json +1 -0
  22. ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-leverage-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_4_seed1/ae.pt +3 -0
  23. ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-leverage-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_4_seed1/config.json +37 -0
  24. ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-leverage-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_4_seed1/eval_results.json +1 -0
  25. ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-leverage-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_8_seed2/ae.pt +3 -0
  26. ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-leverage-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_8_seed2/config.json +37 -0
  27. ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-leverage-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_8_seed2/eval_results.json +1 -0
  28. ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-uniform-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_10_seed2/ae.pt +3 -0
  29. ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-uniform-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_10_seed2/config.json +37 -0
  30. ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-uniform-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_10_seed2/eval_results.json +1 -0
  31. ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-uniform-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_2_seed0/ae.pt +3 -0
  32. ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-uniform-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_2_seed0/config.json +37 -0
  33. ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-uniform-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_2_seed0/eval_results.json +1 -0
  34. ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-uniform-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_6_seed1/ae.pt +3 -0
  35. ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-uniform-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_6_seed1/config.json +37 -0
  36. ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-uniform-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_6_seed1/eval_results.json +1 -0
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-entropy-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_1_seed0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3dbe79bbbf06e8f8076371715cb9df670a18d4b908cca52664b3971b6ec1a2f
3
+ size 403183898
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-entropy-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_1_seed0/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "HybridSampledTopKTrainer",
4
+ "dict_class": "HybridSampledTopKSAE",
5
+ "lr": 0.0003,
6
+ "steps": 12207,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 9765,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "sampling_update_freq": 1,
13
+ "sampling_method": "entropy",
14
+ "ridge_lambda": 0.01,
15
+ "sketching_size": 100,
16
+ "top_k_aux": 384,
17
+ "seed": 0,
18
+ "activation_dim": 768,
19
+ "dict_size": 65536,
20
+ "k": 60,
21
+ "device": "cuda:0",
22
+ "layer": 6,
23
+ "lm_name": "EleutherAI/pythia-160m-deduped",
24
+ "wandb_name": "HybridSampledTopKTrainer-entropy-EleutherAI/pythia-160m-deduped-resid_post_layer_6_trainer_1",
25
+ "submodule_name": "resid_post_layer_6",
26
+ "l_multiplier": 3.0
27
+ },
28
+ "buffer": {
29
+ "d_submodule": 768,
30
+ "io": "out",
31
+ "n_ctxs": 244,
32
+ "ctx_len": 1024,
33
+ "refresh_batch_size": 32,
34
+ "out_batch_size": 2048,
35
+ "device": "cuda:0"
36
+ }
37
+ }
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-entropy-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_1_seed0/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 9.474773320284756, "l1_loss": 151.43422305945194, "l0": 60.0, "frac_variance_explained": 0.8375380797819658, "cossim": 0.8384932984005321, "l2_ratio": 0.8707980658068801, "relative_reconstruction_bias": 1.0168493335897273, "loss_original": 2.6030195698593603, "loss_reconstructed": 5.313630768747041, "loss_zero": 11.09529752442331, "frac_recovered": 0.6520047928347732, "frac_alive": 0.006378173828125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-entropy-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_5_seed1/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:914c982bdf38946538eb331f9898e36ed39a5bfbe64f98109a7663a5535dcc76
3
+ size 403183898
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-entropy-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_5_seed1/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "HybridSampledTopKTrainer",
4
+ "dict_class": "HybridSampledTopKSAE",
5
+ "lr": 0.0003,
6
+ "steps": 12207,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 9765,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "sampling_update_freq": 1,
13
+ "sampling_method": "entropy",
14
+ "ridge_lambda": 0.01,
15
+ "sketching_size": 100,
16
+ "top_k_aux": 384,
17
+ "seed": 1,
18
+ "activation_dim": 768,
19
+ "dict_size": 65536,
20
+ "k": 60,
21
+ "device": "cuda:0",
22
+ "layer": 6,
23
+ "lm_name": "EleutherAI/pythia-160m-deduped",
24
+ "wandb_name": "HybridSampledTopKTrainer-entropy-EleutherAI/pythia-160m-deduped-resid_post_layer_6_trainer_5",
25
+ "submodule_name": "resid_post_layer_6",
26
+ "l_multiplier": 3.0
27
+ },
28
+ "buffer": {
29
+ "d_submodule": 768,
30
+ "io": "out",
31
+ "n_ctxs": 244,
32
+ "ctx_len": 1024,
33
+ "refresh_batch_size": 32,
34
+ "out_batch_size": 2048,
35
+ "device": "cuda:0"
36
+ }
37
+ }
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-entropy-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_5_seed1/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 9.408500469092166, "l1_loss": 151.51055214621803, "l0": 60.0, "frac_variance_explained": 0.8405444116303415, "cossim": 0.8407332084395669, "l2_ratio": 0.8666513526078427, "relative_reconstruction_bias": 1.0147784514860674, "loss_original": 2.6030195698593603, "loss_reconstructed": 5.262264193910541, "loss_zero": 11.09529752442331, "frac_recovered": 0.6663670955282269, "frac_alive": 0.0065460205078125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-entropy-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_9_seed2/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd63f3783cf02e274427f9c73ce8a5f00f77939188428912ba377c35ec3d7292
3
+ size 403183898
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-entropy-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_9_seed2/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "HybridSampledTopKTrainer",
4
+ "dict_class": "HybridSampledTopKSAE",
5
+ "lr": 0.0003,
6
+ "steps": 12207,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 9765,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "sampling_update_freq": 1,
13
+ "sampling_method": "entropy",
14
+ "ridge_lambda": 0.01,
15
+ "sketching_size": 100,
16
+ "top_k_aux": 384,
17
+ "seed": 2,
18
+ "activation_dim": 768,
19
+ "dict_size": 65536,
20
+ "k": 60,
21
+ "device": "cuda:0",
22
+ "layer": 6,
23
+ "lm_name": "EleutherAI/pythia-160m-deduped",
24
+ "wandb_name": "HybridSampledTopKTrainer-entropy-EleutherAI/pythia-160m-deduped-resid_post_layer_6_trainer_9",
25
+ "submodule_name": "resid_post_layer_6",
26
+ "l_multiplier": 3.0
27
+ },
28
+ "buffer": {
29
+ "d_submodule": 768,
30
+ "io": "out",
31
+ "n_ctxs": 244,
32
+ "ctx_len": 1024,
33
+ "refresh_batch_size": 32,
34
+ "out_batch_size": 2048,
35
+ "device": "cuda:0"
36
+ }
37
+ }
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-entropy-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_9_seed2/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 9.324038881244082, "l1_loss": 157.34996911251184, "l0": 60.0, "frac_variance_explained": 0.8403036377646707, "cossim": 0.8437799106944691, "l2_ratio": 0.8656452587156584, "relative_reconstruction_bias": 1.0134581724802654, "loss_original": 2.6030195698593603, "loss_reconstructed": 5.360622145912864, "loss_zero": 11.09529752442331, "frac_recovered": 0.6453730193051425, "frac_alive": 0.006134033203125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-l2_norm-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_11_seed2/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7304d66638056f0e5eaa4f67c6d74a80103b5cac276a9e5197720eb868c20760
3
+ size 403183898
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-l2_norm-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_11_seed2/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "HybridSampledTopKTrainer",
4
+ "dict_class": "HybridSampledTopKSAE",
5
+ "lr": 0.0003,
6
+ "steps": 12207,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 9765,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "sampling_update_freq": 1,
13
+ "sampling_method": "l2_norm",
14
+ "ridge_lambda": 0.01,
15
+ "sketching_size": 100,
16
+ "top_k_aux": 384,
17
+ "seed": 2,
18
+ "activation_dim": 768,
19
+ "dict_size": 65536,
20
+ "k": 60,
21
+ "device": "cuda:0",
22
+ "layer": 6,
23
+ "lm_name": "EleutherAI/pythia-160m-deduped",
24
+ "wandb_name": "HybridSampledTopKTrainer-l2_norm-EleutherAI/pythia-160m-deduped-resid_post_layer_6_trainer_11",
25
+ "submodule_name": "resid_post_layer_6",
26
+ "l_multiplier": 3.0
27
+ },
28
+ "buffer": {
29
+ "d_submodule": 768,
30
+ "io": "out",
31
+ "n_ctxs": 244,
32
+ "ctx_len": 1024,
33
+ "refresh_batch_size": 32,
34
+ "out_batch_size": 2048,
35
+ "device": "cuda:0"
36
+ }
37
+ }
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-l2_norm-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_11_seed2/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 8.444165403192693, "l1_loss": 121.41419936671402, "l0": 60.0, "frac_variance_explained": 0.8684274463942556, "cossim": 0.8714822946172772, "l2_ratio": 0.8715381315260222, "relative_reconstruction_bias": 0.9984932162544944, "loss_original": 2.6030195698593603, "loss_reconstructed": 5.118368091005268, "loss_zero": 11.09529752442331, "frac_recovered": 0.6774183656230117, "frac_alive": 0.0027923583984375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-l2_norm-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_3_seed0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eedc137947ccfef968576451ef15407edda72539ee0ea4d5b04ae50db1d1a6b3
3
+ size 403183898
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-l2_norm-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_3_seed0/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "HybridSampledTopKTrainer",
4
+ "dict_class": "HybridSampledTopKSAE",
5
+ "lr": 0.0003,
6
+ "steps": 12207,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 9765,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "sampling_update_freq": 1,
13
+ "sampling_method": "l2_norm",
14
+ "ridge_lambda": 0.01,
15
+ "sketching_size": 100,
16
+ "top_k_aux": 384,
17
+ "seed": 0,
18
+ "activation_dim": 768,
19
+ "dict_size": 65536,
20
+ "k": 60,
21
+ "device": "cuda:0",
22
+ "layer": 6,
23
+ "lm_name": "EleutherAI/pythia-160m-deduped",
24
+ "wandb_name": "HybridSampledTopKTrainer-l2_norm-EleutherAI/pythia-160m-deduped-resid_post_layer_6_trainer_3",
25
+ "submodule_name": "resid_post_layer_6",
26
+ "l_multiplier": 3.0
27
+ },
28
+ "buffer": {
29
+ "d_submodule": 768,
30
+ "io": "out",
31
+ "n_ctxs": 244,
32
+ "ctx_len": 1024,
33
+ "refresh_batch_size": 32,
34
+ "out_batch_size": 2048,
35
+ "device": "cuda:0"
36
+ }
37
+ }
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-l2_norm-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_3_seed0/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 8.443496444008566, "l1_loss": 125.03681090383819, "l0": 60.0, "frac_variance_explained": 0.8674000429384636, "cossim": 0.8712869611653414, "l2_ratio": 0.8714855367487128, "relative_reconstruction_bias": 0.9995003360690493, "loss_original": 2.6030195698593603, "loss_reconstructed": 4.966185020677971, "loss_zero": 11.09529752442331, "frac_recovered": 0.6983266219948278, "frac_alive": 0.0028076171875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-l2_norm-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_7_seed1/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d30e540f7e19800970cd2bb8c77f5f1df299017d9831bc9901df9e049b762802
3
+ size 403183898
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-l2_norm-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_7_seed1/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "HybridSampledTopKTrainer",
4
+ "dict_class": "HybridSampledTopKSAE",
5
+ "lr": 0.0003,
6
+ "steps": 12207,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 9765,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "sampling_update_freq": 1,
13
+ "sampling_method": "l2_norm",
14
+ "ridge_lambda": 0.01,
15
+ "sketching_size": 100,
16
+ "top_k_aux": 384,
17
+ "seed": 1,
18
+ "activation_dim": 768,
19
+ "dict_size": 65536,
20
+ "k": 60,
21
+ "device": "cuda:0",
22
+ "layer": 6,
23
+ "lm_name": "EleutherAI/pythia-160m-deduped",
24
+ "wandb_name": "HybridSampledTopKTrainer-l2_norm-EleutherAI/pythia-160m-deduped-resid_post_layer_6_trainer_7",
25
+ "submodule_name": "resid_post_layer_6",
26
+ "l_multiplier": 3.0
27
+ },
28
+ "buffer": {
29
+ "d_submodule": 768,
30
+ "io": "out",
31
+ "n_ctxs": 244,
32
+ "ctx_len": 1024,
33
+ "refresh_batch_size": 32,
34
+ "out_batch_size": 2048,
35
+ "device": "cuda:0"
36
+ }
37
+ }
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-l2_norm-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_7_seed1/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 8.431988629427822, "l1_loss": 119.58685487689394, "l0": 60.0, "frac_variance_explained": 0.8706758040370364, "cossim": 0.8717323650013317, "l2_ratio": 0.8721790367906744, "relative_reconstruction_bias": 0.9994440403851595, "loss_original": 2.6030195698593603, "loss_reconstructed": 5.004976113637288, "loss_zero": 11.09529752442331, "frac_recovered": 0.6917261535471136, "frac_alive": 0.0028076171875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-leverage-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_0_seed0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05d111b079974378cc01a05257b1d129eebfa405711a51aee9ef5884dfcb822e
3
+ size 403183898
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-leverage-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_0_seed0/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "HybridSampledTopKTrainer",
4
+ "dict_class": "HybridSampledTopKSAE",
5
+ "lr": 0.0003,
6
+ "steps": 12207,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 9765,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "sampling_update_freq": 1,
13
+ "sampling_method": "leverage",
14
+ "ridge_lambda": 0.01,
15
+ "sketching_size": 100,
16
+ "top_k_aux": 384,
17
+ "seed": 0,
18
+ "activation_dim": 768,
19
+ "dict_size": 65536,
20
+ "k": 60,
21
+ "device": "cuda:0",
22
+ "layer": 6,
23
+ "lm_name": "EleutherAI/pythia-160m-deduped",
24
+ "wandb_name": "HybridSampledTopKTrainer-leverage-EleutherAI/pythia-160m-deduped-resid_post_layer_6_trainer_0",
25
+ "submodule_name": "resid_post_layer_6",
26
+ "l_multiplier": 3.0
27
+ },
28
+ "buffer": {
29
+ "d_submodule": 768,
30
+ "io": "out",
31
+ "n_ctxs": 244,
32
+ "ctx_len": 1024,
33
+ "refresh_batch_size": 32,
34
+ "out_batch_size": 2048,
35
+ "device": "cuda:0"
36
+ }
37
+ }
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-leverage-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_0_seed0/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 8.434096278566303, "l1_loss": 122.03498655377012, "l0": 60.0, "frac_variance_explained": 0.8695206931143096, "cossim": 0.8717283985831521, "l2_ratio": 0.8716153412154226, "relative_reconstruction_bias": 1.0004283731633967, "loss_original": 2.6030195698593603, "loss_reconstructed": 5.244064215457801, "loss_zero": 11.09529752442331, "frac_recovered": 0.6713318048101483, "frac_alive": 0.002777099609375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-leverage-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_4_seed1/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:447f96fb0647ae6603aae4867300adb8f7820b9975d778ae447d4e15f7ca45ac
3
+ size 403183898
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-leverage-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_4_seed1/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "HybridSampledTopKTrainer",
4
+ "dict_class": "HybridSampledTopKSAE",
5
+ "lr": 0.0003,
6
+ "steps": 12207,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 9765,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "sampling_update_freq": 1,
13
+ "sampling_method": "leverage",
14
+ "ridge_lambda": 0.01,
15
+ "sketching_size": 100,
16
+ "top_k_aux": 384,
17
+ "seed": 1,
18
+ "activation_dim": 768,
19
+ "dict_size": 65536,
20
+ "k": 60,
21
+ "device": "cuda:0",
22
+ "layer": 6,
23
+ "lm_name": "EleutherAI/pythia-160m-deduped",
24
+ "wandb_name": "HybridSampledTopKTrainer-leverage-EleutherAI/pythia-160m-deduped-resid_post_layer_6_trainer_4",
25
+ "submodule_name": "resid_post_layer_6",
26
+ "l_multiplier": 3.0
27
+ },
28
+ "buffer": {
29
+ "d_submodule": 768,
30
+ "io": "out",
31
+ "n_ctxs": 244,
32
+ "ctx_len": 1024,
33
+ "refresh_batch_size": 32,
34
+ "out_batch_size": 2048,
35
+ "device": "cuda:0"
36
+ }
37
+ }
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-leverage-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_4_seed1/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 8.432261698173754, "l1_loss": 119.4970097397313, "l0": 60.0, "frac_variance_explained": 0.8702907815124049, "cossim": 0.8717991763895209, "l2_ratio": 0.8722440781015338, "relative_reconstruction_bias": 0.9994456984780051, "loss_original": 2.6030195698593603, "loss_reconstructed": 5.004976113637288, "loss_zero": 11.09529752442331, "frac_recovered": 0.6917261535471136, "frac_alive": 0.002838134765625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-leverage-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_8_seed2/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fef9b961427fe1bea11bcee163e102e880a793800e4dd3b93c846ab214d9283
3
+ size 403183898
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-leverage-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_8_seed2/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "HybridSampledTopKTrainer",
4
+ "dict_class": "HybridSampledTopKSAE",
5
+ "lr": 0.0003,
6
+ "steps": 12207,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 9765,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "sampling_update_freq": 1,
13
+ "sampling_method": "leverage",
14
+ "ridge_lambda": 0.01,
15
+ "sketching_size": 100,
16
+ "top_k_aux": 384,
17
+ "seed": 2,
18
+ "activation_dim": 768,
19
+ "dict_size": 65536,
20
+ "k": 60,
21
+ "device": "cuda:0",
22
+ "layer": 6,
23
+ "lm_name": "EleutherAI/pythia-160m-deduped",
24
+ "wandb_name": "HybridSampledTopKTrainer-leverage-EleutherAI/pythia-160m-deduped-resid_post_layer_6_trainer_8",
25
+ "submodule_name": "resid_post_layer_6",
26
+ "l_multiplier": 3.0
27
+ },
28
+ "buffer": {
29
+ "d_submodule": 768,
30
+ "io": "out",
31
+ "n_ctxs": 244,
32
+ "ctx_len": 1024,
33
+ "refresh_batch_size": 32,
34
+ "out_batch_size": 2048,
35
+ "device": "cuda:0"
36
+ }
37
+ }
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-leverage-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_8_seed2/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 8.441039461078066, "l1_loss": 121.47962581750119, "l0": 60.0, "frac_variance_explained": 0.870531931067958, "cossim": 0.8715875419703397, "l2_ratio": 0.871613925153559, "relative_reconstruction_bias": 0.9984585823434772, "loss_original": 2.6030195698593603, "loss_reconstructed": 5.118368091005268, "loss_zero": 11.09529752442331, "frac_recovered": 0.6774183656230117, "frac_alive": 0.0028228759765625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-uniform-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_10_seed2/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d5c6f4484cd1f28f0bc36ec467e049f992fef32cbb8f62818735fb4f31e7341
3
+ size 403183898
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-uniform-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_10_seed2/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "HybridSampledTopKTrainer",
4
+ "dict_class": "HybridSampledTopKSAE",
5
+ "lr": 0.0003,
6
+ "steps": 12207,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 9765,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "sampling_update_freq": 1,
13
+ "sampling_method": "uniform",
14
+ "ridge_lambda": 0.01,
15
+ "sketching_size": 100,
16
+ "top_k_aux": 384,
17
+ "seed": 2,
18
+ "activation_dim": 768,
19
+ "dict_size": 65536,
20
+ "k": 60,
21
+ "device": "cuda:0",
22
+ "layer": 6,
23
+ "lm_name": "EleutherAI/pythia-160m-deduped",
24
+ "wandb_name": "HybridSampledTopKTrainer-uniform-EleutherAI/pythia-160m-deduped-resid_post_layer_6_trainer_10",
25
+ "submodule_name": "resid_post_layer_6",
26
+ "l_multiplier": 3.0
27
+ },
28
+ "buffer": {
29
+ "d_submodule": 768,
30
+ "io": "out",
31
+ "n_ctxs": 244,
32
+ "ctx_len": 1024,
33
+ "refresh_batch_size": 32,
34
+ "out_batch_size": 2048,
35
+ "device": "cuda:0"
36
+ }
37
+ }
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-uniform-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_10_seed2/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 8.463962757226193, "l1_loss": 130.3787534309156, "l0": 60.0, "frac_variance_explained": 0.86707542701201, "cossim": 0.870789596528718, "l2_ratio": 0.8709013696872827, "relative_reconstruction_bias": 0.99878458182017, "loss_original": 2.6030195698593603, "loss_reconstructed": 4.404964736013701, "loss_zero": 11.09529752442331, "frac_recovered": 0.7763142224514124, "frac_alive": 0.00274658203125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-uniform-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_2_seed0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f43cc97c1c381ac656f198ea181e4d4924bdd6f91288bb6eb8e71326fcc45aee
3
+ size 403183898
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-uniform-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_2_seed0/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "HybridSampledTopKTrainer",
4
+ "dict_class": "HybridSampledTopKSAE",
5
+ "lr": 0.0003,
6
+ "steps": 12207,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 9765,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "sampling_update_freq": 1,
13
+ "sampling_method": "uniform",
14
+ "ridge_lambda": 0.01,
15
+ "sketching_size": 100,
16
+ "top_k_aux": 384,
17
+ "seed": 0,
18
+ "activation_dim": 768,
19
+ "dict_size": 65536,
20
+ "k": 60,
21
+ "device": "cuda:0",
22
+ "layer": 6,
23
+ "lm_name": "EleutherAI/pythia-160m-deduped",
24
+ "wandb_name": "HybridSampledTopKTrainer-uniform-EleutherAI/pythia-160m-deduped-resid_post_layer_6_trainer_2",
25
+ "submodule_name": "resid_post_layer_6",
26
+ "l_multiplier": 3.0
27
+ },
28
+ "buffer": {
29
+ "d_submodule": 768,
30
+ "io": "out",
31
+ "n_ctxs": 244,
32
+ "ctx_len": 1024,
33
+ "refresh_batch_size": 32,
34
+ "out_batch_size": 2048,
35
+ "device": "cuda:0"
36
+ }
37
+ }
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-uniform-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_2_seed0/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 8.452134623672023, "l1_loss": 122.55520259972775, "l0": 60.0, "frac_variance_explained": 0.8694721062978109, "cossim": 0.871168497836951, "l2_ratio": 0.8718094103264086, "relative_reconstruction_bias": 1.001582398559108, "loss_original": 2.6030195698593603, "loss_reconstructed": 4.402381441809914, "loss_zero": 11.09529752442331, "frac_recovered": 0.7762271111661737, "frac_alive": 0.00274658203125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-uniform-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_6_seed1/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:808d33e8472d9704f12c85fa27a9c4207bc647bd39828baa05e5066d9288b53e
3
+ size 403183898
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-uniform-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_6_seed1/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "HybridSampledTopKTrainer",
4
+ "dict_class": "HybridSampledTopKSAE",
5
+ "lr": 0.0003,
6
+ "steps": 12207,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 9765,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "sampling_update_freq": 1,
13
+ "sampling_method": "uniform",
14
+ "ridge_lambda": 0.01,
15
+ "sketching_size": 100,
16
+ "top_k_aux": 384,
17
+ "seed": 1,
18
+ "activation_dim": 768,
19
+ "dict_size": 65536,
20
+ "k": 60,
21
+ "device": "cuda:0",
22
+ "layer": 6,
23
+ "lm_name": "EleutherAI/pythia-160m-deduped",
24
+ "wandb_name": "HybridSampledTopKTrainer-uniform-EleutherAI/pythia-160m-deduped-resid_post_layer_6_trainer_6",
25
+ "submodule_name": "resid_post_layer_6",
26
+ "l_multiplier": 3.0
27
+ },
28
+ "buffer": {
29
+ "d_submodule": 768,
30
+ "io": "out",
31
+ "n_ctxs": 244,
32
+ "ctx_len": 1024,
33
+ "refresh_batch_size": 32,
34
+ "out_batch_size": 2048,
35
+ "device": "cuda:0"
36
+ }
37
+ }
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-uniform-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_6_seed1/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 8.463646339647697, "l1_loss": 130.58196420380563, "l0": 60.0, "frac_variance_explained": 0.8669460072661891, "cossim": 0.8709347663503705, "l2_ratio": 0.8710685751654885, "relative_reconstruction_bias": 1.0013282949274236, "loss_original": 2.6030195698593603, "loss_reconstructed": 4.356847734162302, "loss_zero": 11.09529752442331, "frac_recovered": 0.7823429197976084, "frac_alive": 0.00274658203125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}