Upload folder using huggingface_hub
Browse files- ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-entropy-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_1_seed0/ae.pt +3 -0
- ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-entropy-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_1_seed0/config.json +37 -0
- ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-entropy-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_1_seed0/eval_results.json +1 -0
- ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-entropy-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_5_seed1/ae.pt +3 -0
- ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-entropy-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_5_seed1/config.json +37 -0
- ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-entropy-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_5_seed1/eval_results.json +1 -0
- ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-entropy-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_9_seed2/ae.pt +3 -0
- ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-entropy-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_9_seed2/config.json +37 -0
- ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-entropy-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_9_seed2/eval_results.json +1 -0
- ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-l2_norm-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_11_seed2/ae.pt +3 -0
- ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-l2_norm-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_11_seed2/config.json +37 -0
- ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-l2_norm-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_11_seed2/eval_results.json +1 -0
- ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-l2_norm-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_3_seed0/ae.pt +3 -0
- ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-l2_norm-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_3_seed0/config.json +37 -0
- ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-l2_norm-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_3_seed0/eval_results.json +1 -0
- ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-l2_norm-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_7_seed1/ae.pt +3 -0
- ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-l2_norm-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_7_seed1/config.json +37 -0
- ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-l2_norm-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_7_seed1/eval_results.json +1 -0
- ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-leverage-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_0_seed0/ae.pt +3 -0
- ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-leverage-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_0_seed0/config.json +37 -0
- ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-leverage-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_0_seed0/eval_results.json +1 -0
- ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-leverage-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_4_seed1/ae.pt +3 -0
- ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-leverage-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_4_seed1/config.json +37 -0
- ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-leverage-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_4_seed1/eval_results.json +1 -0
- ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-leverage-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_8_seed2/ae.pt +3 -0
- ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-leverage-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_8_seed2/config.json +37 -0
- ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-leverage-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_8_seed2/eval_results.json +1 -0
- ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-uniform-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_10_seed2/ae.pt +3 -0
- ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-uniform-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_10_seed2/config.json +37 -0
- ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-uniform-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_10_seed2/eval_results.json +1 -0
- ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-uniform-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_2_seed0/ae.pt +3 -0
- ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-uniform-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_2_seed0/config.json +37 -0
- ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-uniform-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_2_seed0/eval_results.json +1 -0
- ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-uniform-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_6_seed1/ae.pt +3 -0
- ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-uniform-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_6_seed1/config.json +37 -0
- ._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-uniform-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_6_seed1/eval_results.json +1 -0
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-entropy-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_1_seed0/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b3dbe79bbbf06e8f8076371715cb9df670a18d4b908cca52664b3971b6ec1a2f
|
| 3 |
+
size 403183898
|
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-entropy-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_1_seed0/config.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "HybridSampledTopKTrainer",
|
| 4 |
+
"dict_class": "HybridSampledTopKSAE",
|
| 5 |
+
"lr": 0.0003,
|
| 6 |
+
"steps": 12207,
|
| 7 |
+
"auxk_alpha": 0.03125,
|
| 8 |
+
"warmup_steps": 1000,
|
| 9 |
+
"decay_start": 9765,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"sampling_update_freq": 1,
|
| 13 |
+
"sampling_method": "entropy",
|
| 14 |
+
"ridge_lambda": 0.01,
|
| 15 |
+
"sketching_size": 100,
|
| 16 |
+
"top_k_aux": 384,
|
| 17 |
+
"seed": 0,
|
| 18 |
+
"activation_dim": 768,
|
| 19 |
+
"dict_size": 65536,
|
| 20 |
+
"k": 60,
|
| 21 |
+
"device": "cuda:0",
|
| 22 |
+
"layer": 6,
|
| 23 |
+
"lm_name": "EleutherAI/pythia-160m-deduped",
|
| 24 |
+
"wandb_name": "HybridSampledTopKTrainer-entropy-EleutherAI/pythia-160m-deduped-resid_post_layer_6_trainer_1",
|
| 25 |
+
"submodule_name": "resid_post_layer_6",
|
| 26 |
+
"l_multiplier": 3.0
|
| 27 |
+
},
|
| 28 |
+
"buffer": {
|
| 29 |
+
"d_submodule": 768,
|
| 30 |
+
"io": "out",
|
| 31 |
+
"n_ctxs": 244,
|
| 32 |
+
"ctx_len": 1024,
|
| 33 |
+
"refresh_batch_size": 32,
|
| 34 |
+
"out_batch_size": 2048,
|
| 35 |
+
"device": "cuda:0"
|
| 36 |
+
}
|
| 37 |
+
}
|
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-entropy-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_1_seed0/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"l2_loss": 9.474773320284756, "l1_loss": 151.43422305945194, "l0": 60.0, "frac_variance_explained": 0.8375380797819658, "cossim": 0.8384932984005321, "l2_ratio": 0.8707980658068801, "relative_reconstruction_bias": 1.0168493335897273, "loss_original": 2.6030195698593603, "loss_reconstructed": 5.313630768747041, "loss_zero": 11.09529752442331, "frac_recovered": 0.6520047928347732, "frac_alive": 0.006378173828125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-entropy-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_5_seed1/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:914c982bdf38946538eb331f9898e36ed39a5bfbe64f98109a7663a5535dcc76
|
| 3 |
+
size 403183898
|
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-entropy-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_5_seed1/config.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "HybridSampledTopKTrainer",
|
| 4 |
+
"dict_class": "HybridSampledTopKSAE",
|
| 5 |
+
"lr": 0.0003,
|
| 6 |
+
"steps": 12207,
|
| 7 |
+
"auxk_alpha": 0.03125,
|
| 8 |
+
"warmup_steps": 1000,
|
| 9 |
+
"decay_start": 9765,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"sampling_update_freq": 1,
|
| 13 |
+
"sampling_method": "entropy",
|
| 14 |
+
"ridge_lambda": 0.01,
|
| 15 |
+
"sketching_size": 100,
|
| 16 |
+
"top_k_aux": 384,
|
| 17 |
+
"seed": 1,
|
| 18 |
+
"activation_dim": 768,
|
| 19 |
+
"dict_size": 65536,
|
| 20 |
+
"k": 60,
|
| 21 |
+
"device": "cuda:0",
|
| 22 |
+
"layer": 6,
|
| 23 |
+
"lm_name": "EleutherAI/pythia-160m-deduped",
|
| 24 |
+
"wandb_name": "HybridSampledTopKTrainer-entropy-EleutherAI/pythia-160m-deduped-resid_post_layer_6_trainer_5",
|
| 25 |
+
"submodule_name": "resid_post_layer_6",
|
| 26 |
+
"l_multiplier": 3.0
|
| 27 |
+
},
|
| 28 |
+
"buffer": {
|
| 29 |
+
"d_submodule": 768,
|
| 30 |
+
"io": "out",
|
| 31 |
+
"n_ctxs": 244,
|
| 32 |
+
"ctx_len": 1024,
|
| 33 |
+
"refresh_batch_size": 32,
|
| 34 |
+
"out_batch_size": 2048,
|
| 35 |
+
"device": "cuda:0"
|
| 36 |
+
}
|
| 37 |
+
}
|
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-entropy-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_5_seed1/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"l2_loss": 9.408500469092166, "l1_loss": 151.51055214621803, "l0": 60.0, "frac_variance_explained": 0.8405444116303415, "cossim": 0.8407332084395669, "l2_ratio": 0.8666513526078427, "relative_reconstruction_bias": 1.0147784514860674, "loss_original": 2.6030195698593603, "loss_reconstructed": 5.262264193910541, "loss_zero": 11.09529752442331, "frac_recovered": 0.6663670955282269, "frac_alive": 0.0065460205078125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-entropy-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_9_seed2/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fd63f3783cf02e274427f9c73ce8a5f00f77939188428912ba377c35ec3d7292
|
| 3 |
+
size 403183898
|
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-entropy-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_9_seed2/config.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "HybridSampledTopKTrainer",
|
| 4 |
+
"dict_class": "HybridSampledTopKSAE",
|
| 5 |
+
"lr": 0.0003,
|
| 6 |
+
"steps": 12207,
|
| 7 |
+
"auxk_alpha": 0.03125,
|
| 8 |
+
"warmup_steps": 1000,
|
| 9 |
+
"decay_start": 9765,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"sampling_update_freq": 1,
|
| 13 |
+
"sampling_method": "entropy",
|
| 14 |
+
"ridge_lambda": 0.01,
|
| 15 |
+
"sketching_size": 100,
|
| 16 |
+
"top_k_aux": 384,
|
| 17 |
+
"seed": 2,
|
| 18 |
+
"activation_dim": 768,
|
| 19 |
+
"dict_size": 65536,
|
| 20 |
+
"k": 60,
|
| 21 |
+
"device": "cuda:0",
|
| 22 |
+
"layer": 6,
|
| 23 |
+
"lm_name": "EleutherAI/pythia-160m-deduped",
|
| 24 |
+
"wandb_name": "HybridSampledTopKTrainer-entropy-EleutherAI/pythia-160m-deduped-resid_post_layer_6_trainer_9",
|
| 25 |
+
"submodule_name": "resid_post_layer_6",
|
| 26 |
+
"l_multiplier": 3.0
|
| 27 |
+
},
|
| 28 |
+
"buffer": {
|
| 29 |
+
"d_submodule": 768,
|
| 30 |
+
"io": "out",
|
| 31 |
+
"n_ctxs": 244,
|
| 32 |
+
"ctx_len": 1024,
|
| 33 |
+
"refresh_batch_size": 32,
|
| 34 |
+
"out_batch_size": 2048,
|
| 35 |
+
"device": "cuda:0"
|
| 36 |
+
}
|
| 37 |
+
}
|
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-entropy-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_9_seed2/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"l2_loss": 9.324038881244082, "l1_loss": 157.34996911251184, "l0": 60.0, "frac_variance_explained": 0.8403036377646707, "cossim": 0.8437799106944691, "l2_ratio": 0.8656452587156584, "relative_reconstruction_bias": 1.0134581724802654, "loss_original": 2.6030195698593603, "loss_reconstructed": 5.360622145912864, "loss_zero": 11.09529752442331, "frac_recovered": 0.6453730193051425, "frac_alive": 0.006134033203125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-l2_norm-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_11_seed2/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7304d66638056f0e5eaa4f67c6d74a80103b5cac276a9e5197720eb868c20760
|
| 3 |
+
size 403183898
|
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-l2_norm-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_11_seed2/config.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "HybridSampledTopKTrainer",
|
| 4 |
+
"dict_class": "HybridSampledTopKSAE",
|
| 5 |
+
"lr": 0.0003,
|
| 6 |
+
"steps": 12207,
|
| 7 |
+
"auxk_alpha": 0.03125,
|
| 8 |
+
"warmup_steps": 1000,
|
| 9 |
+
"decay_start": 9765,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"sampling_update_freq": 1,
|
| 13 |
+
"sampling_method": "l2_norm",
|
| 14 |
+
"ridge_lambda": 0.01,
|
| 15 |
+
"sketching_size": 100,
|
| 16 |
+
"top_k_aux": 384,
|
| 17 |
+
"seed": 2,
|
| 18 |
+
"activation_dim": 768,
|
| 19 |
+
"dict_size": 65536,
|
| 20 |
+
"k": 60,
|
| 21 |
+
"device": "cuda:0",
|
| 22 |
+
"layer": 6,
|
| 23 |
+
"lm_name": "EleutherAI/pythia-160m-deduped",
|
| 24 |
+
"wandb_name": "HybridSampledTopKTrainer-l2_norm-EleutherAI/pythia-160m-deduped-resid_post_layer_6_trainer_11",
|
| 25 |
+
"submodule_name": "resid_post_layer_6",
|
| 26 |
+
"l_multiplier": 3.0
|
| 27 |
+
},
|
| 28 |
+
"buffer": {
|
| 29 |
+
"d_submodule": 768,
|
| 30 |
+
"io": "out",
|
| 31 |
+
"n_ctxs": 244,
|
| 32 |
+
"ctx_len": 1024,
|
| 33 |
+
"refresh_batch_size": 32,
|
| 34 |
+
"out_batch_size": 2048,
|
| 35 |
+
"device": "cuda:0"
|
| 36 |
+
}
|
| 37 |
+
}
|
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-l2_norm-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_11_seed2/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"l2_loss": 8.444165403192693, "l1_loss": 121.41419936671402, "l0": 60.0, "frac_variance_explained": 0.8684274463942556, "cossim": 0.8714822946172772, "l2_ratio": 0.8715381315260222, "relative_reconstruction_bias": 0.9984932162544944, "loss_original": 2.6030195698593603, "loss_reconstructed": 5.118368091005268, "loss_zero": 11.09529752442331, "frac_recovered": 0.6774183656230117, "frac_alive": 0.0027923583984375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-l2_norm-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_3_seed0/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eedc137947ccfef968576451ef15407edda72539ee0ea4d5b04ae50db1d1a6b3
|
| 3 |
+
size 403183898
|
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-l2_norm-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_3_seed0/config.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "HybridSampledTopKTrainer",
|
| 4 |
+
"dict_class": "HybridSampledTopKSAE",
|
| 5 |
+
"lr": 0.0003,
|
| 6 |
+
"steps": 12207,
|
| 7 |
+
"auxk_alpha": 0.03125,
|
| 8 |
+
"warmup_steps": 1000,
|
| 9 |
+
"decay_start": 9765,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"sampling_update_freq": 1,
|
| 13 |
+
"sampling_method": "l2_norm",
|
| 14 |
+
"ridge_lambda": 0.01,
|
| 15 |
+
"sketching_size": 100,
|
| 16 |
+
"top_k_aux": 384,
|
| 17 |
+
"seed": 0,
|
| 18 |
+
"activation_dim": 768,
|
| 19 |
+
"dict_size": 65536,
|
| 20 |
+
"k": 60,
|
| 21 |
+
"device": "cuda:0",
|
| 22 |
+
"layer": 6,
|
| 23 |
+
"lm_name": "EleutherAI/pythia-160m-deduped",
|
| 24 |
+
"wandb_name": "HybridSampledTopKTrainer-l2_norm-EleutherAI/pythia-160m-deduped-resid_post_layer_6_trainer_3",
|
| 25 |
+
"submodule_name": "resid_post_layer_6",
|
| 26 |
+
"l_multiplier": 3.0
|
| 27 |
+
},
|
| 28 |
+
"buffer": {
|
| 29 |
+
"d_submodule": 768,
|
| 30 |
+
"io": "out",
|
| 31 |
+
"n_ctxs": 244,
|
| 32 |
+
"ctx_len": 1024,
|
| 33 |
+
"refresh_batch_size": 32,
|
| 34 |
+
"out_batch_size": 2048,
|
| 35 |
+
"device": "cuda:0"
|
| 36 |
+
}
|
| 37 |
+
}
|
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-l2_norm-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_3_seed0/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"l2_loss": 8.443496444008566, "l1_loss": 125.03681090383819, "l0": 60.0, "frac_variance_explained": 0.8674000429384636, "cossim": 0.8712869611653414, "l2_ratio": 0.8714855367487128, "relative_reconstruction_bias": 0.9995003360690493, "loss_original": 2.6030195698593603, "loss_reconstructed": 4.966185020677971, "loss_zero": 11.09529752442331, "frac_recovered": 0.6983266219948278, "frac_alive": 0.0028076171875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-l2_norm-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_7_seed1/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d30e540f7e19800970cd2bb8c77f5f1df299017d9831bc9901df9e049b762802
|
| 3 |
+
size 403183898
|
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-l2_norm-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_7_seed1/config.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "HybridSampledTopKTrainer",
|
| 4 |
+
"dict_class": "HybridSampledTopKSAE",
|
| 5 |
+
"lr": 0.0003,
|
| 6 |
+
"steps": 12207,
|
| 7 |
+
"auxk_alpha": 0.03125,
|
| 8 |
+
"warmup_steps": 1000,
|
| 9 |
+
"decay_start": 9765,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"sampling_update_freq": 1,
|
| 13 |
+
"sampling_method": "l2_norm",
|
| 14 |
+
"ridge_lambda": 0.01,
|
| 15 |
+
"sketching_size": 100,
|
| 16 |
+
"top_k_aux": 384,
|
| 17 |
+
"seed": 1,
|
| 18 |
+
"activation_dim": 768,
|
| 19 |
+
"dict_size": 65536,
|
| 20 |
+
"k": 60,
|
| 21 |
+
"device": "cuda:0",
|
| 22 |
+
"layer": 6,
|
| 23 |
+
"lm_name": "EleutherAI/pythia-160m-deduped",
|
| 24 |
+
"wandb_name": "HybridSampledTopKTrainer-l2_norm-EleutherAI/pythia-160m-deduped-resid_post_layer_6_trainer_7",
|
| 25 |
+
"submodule_name": "resid_post_layer_6",
|
| 26 |
+
"l_multiplier": 3.0
|
| 27 |
+
},
|
| 28 |
+
"buffer": {
|
| 29 |
+
"d_submodule": 768,
|
| 30 |
+
"io": "out",
|
| 31 |
+
"n_ctxs": 244,
|
| 32 |
+
"ctx_len": 1024,
|
| 33 |
+
"refresh_batch_size": 32,
|
| 34 |
+
"out_batch_size": 2048,
|
| 35 |
+
"device": "cuda:0"
|
| 36 |
+
}
|
| 37 |
+
}
|
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-l2_norm-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_7_seed1/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"l2_loss": 8.431988629427822, "l1_loss": 119.58685487689394, "l0": 60.0, "frac_variance_explained": 0.8706758040370364, "cossim": 0.8717323650013317, "l2_ratio": 0.8721790367906744, "relative_reconstruction_bias": 0.9994440403851595, "loss_original": 2.6030195698593603, "loss_reconstructed": 5.004976113637288, "loss_zero": 11.09529752442331, "frac_recovered": 0.6917261535471136, "frac_alive": 0.0028076171875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-leverage-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_0_seed0/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:05d111b079974378cc01a05257b1d129eebfa405711a51aee9ef5884dfcb822e
|
| 3 |
+
size 403183898
|
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-leverage-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_0_seed0/config.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "HybridSampledTopKTrainer",
|
| 4 |
+
"dict_class": "HybridSampledTopKSAE",
|
| 5 |
+
"lr": 0.0003,
|
| 6 |
+
"steps": 12207,
|
| 7 |
+
"auxk_alpha": 0.03125,
|
| 8 |
+
"warmup_steps": 1000,
|
| 9 |
+
"decay_start": 9765,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"sampling_update_freq": 1,
|
| 13 |
+
"sampling_method": "leverage",
|
| 14 |
+
"ridge_lambda": 0.01,
|
| 15 |
+
"sketching_size": 100,
|
| 16 |
+
"top_k_aux": 384,
|
| 17 |
+
"seed": 0,
|
| 18 |
+
"activation_dim": 768,
|
| 19 |
+
"dict_size": 65536,
|
| 20 |
+
"k": 60,
|
| 21 |
+
"device": "cuda:0",
|
| 22 |
+
"layer": 6,
|
| 23 |
+
"lm_name": "EleutherAI/pythia-160m-deduped",
|
| 24 |
+
"wandb_name": "HybridSampledTopKTrainer-leverage-EleutherAI/pythia-160m-deduped-resid_post_layer_6_trainer_0",
|
| 25 |
+
"submodule_name": "resid_post_layer_6",
|
| 26 |
+
"l_multiplier": 3.0
|
| 27 |
+
},
|
| 28 |
+
"buffer": {
|
| 29 |
+
"d_submodule": 768,
|
| 30 |
+
"io": "out",
|
| 31 |
+
"n_ctxs": 244,
|
| 32 |
+
"ctx_len": 1024,
|
| 33 |
+
"refresh_batch_size": 32,
|
| 34 |
+
"out_batch_size": 2048,
|
| 35 |
+
"device": "cuda:0"
|
| 36 |
+
}
|
| 37 |
+
}
|
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-leverage-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_0_seed0/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"l2_loss": 8.434096278566303, "l1_loss": 122.03498655377012, "l0": 60.0, "frac_variance_explained": 0.8695206931143096, "cossim": 0.8717283985831521, "l2_ratio": 0.8716153412154226, "relative_reconstruction_bias": 1.0004283731633967, "loss_original": 2.6030195698593603, "loss_reconstructed": 5.244064215457801, "loss_zero": 11.09529752442331, "frac_recovered": 0.6713318048101483, "frac_alive": 0.002777099609375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-leverage-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_4_seed1/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:447f96fb0647ae6603aae4867300adb8f7820b9975d778ae447d4e15f7ca45ac
|
| 3 |
+
size 403183898
|
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-leverage-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_4_seed1/config.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "HybridSampledTopKTrainer",
|
| 4 |
+
"dict_class": "HybridSampledTopKSAE",
|
| 5 |
+
"lr": 0.0003,
|
| 6 |
+
"steps": 12207,
|
| 7 |
+
"auxk_alpha": 0.03125,
|
| 8 |
+
"warmup_steps": 1000,
|
| 9 |
+
"decay_start": 9765,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"sampling_update_freq": 1,
|
| 13 |
+
"sampling_method": "leverage",
|
| 14 |
+
"ridge_lambda": 0.01,
|
| 15 |
+
"sketching_size": 100,
|
| 16 |
+
"top_k_aux": 384,
|
| 17 |
+
"seed": 1,
|
| 18 |
+
"activation_dim": 768,
|
| 19 |
+
"dict_size": 65536,
|
| 20 |
+
"k": 60,
|
| 21 |
+
"device": "cuda:0",
|
| 22 |
+
"layer": 6,
|
| 23 |
+
"lm_name": "EleutherAI/pythia-160m-deduped",
|
| 24 |
+
"wandb_name": "HybridSampledTopKTrainer-leverage-EleutherAI/pythia-160m-deduped-resid_post_layer_6_trainer_4",
|
| 25 |
+
"submodule_name": "resid_post_layer_6",
|
| 26 |
+
"l_multiplier": 3.0
|
| 27 |
+
},
|
| 28 |
+
"buffer": {
|
| 29 |
+
"d_submodule": 768,
|
| 30 |
+
"io": "out",
|
| 31 |
+
"n_ctxs": 244,
|
| 32 |
+
"ctx_len": 1024,
|
| 33 |
+
"refresh_batch_size": 32,
|
| 34 |
+
"out_batch_size": 2048,
|
| 35 |
+
"device": "cuda:0"
|
| 36 |
+
}
|
| 37 |
+
}
|
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-leverage-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_4_seed1/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"l2_loss": 8.432261698173754, "l1_loss": 119.4970097397313, "l0": 60.0, "frac_variance_explained": 0.8702907815124049, "cossim": 0.8717991763895209, "l2_ratio": 0.8722440781015338, "relative_reconstruction_bias": 0.9994456984780051, "loss_original": 2.6030195698593603, "loss_reconstructed": 5.004976113637288, "loss_zero": 11.09529752442331, "frac_recovered": 0.6917261535471136, "frac_alive": 0.002838134765625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-leverage-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_8_seed2/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1fef9b961427fe1bea11bcee163e102e880a793800e4dd3b93c846ab214d9283
|
| 3 |
+
size 403183898
|
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-leverage-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_8_seed2/config.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "HybridSampledTopKTrainer",
|
| 4 |
+
"dict_class": "HybridSampledTopKSAE",
|
| 5 |
+
"lr": 0.0003,
|
| 6 |
+
"steps": 12207,
|
| 7 |
+
"auxk_alpha": 0.03125,
|
| 8 |
+
"warmup_steps": 1000,
|
| 9 |
+
"decay_start": 9765,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"sampling_update_freq": 1,
|
| 13 |
+
"sampling_method": "leverage",
|
| 14 |
+
"ridge_lambda": 0.01,
|
| 15 |
+
"sketching_size": 100,
|
| 16 |
+
"top_k_aux": 384,
|
| 17 |
+
"seed": 2,
|
| 18 |
+
"activation_dim": 768,
|
| 19 |
+
"dict_size": 65536,
|
| 20 |
+
"k": 60,
|
| 21 |
+
"device": "cuda:0",
|
| 22 |
+
"layer": 6,
|
| 23 |
+
"lm_name": "EleutherAI/pythia-160m-deduped",
|
| 24 |
+
"wandb_name": "HybridSampledTopKTrainer-leverage-EleutherAI/pythia-160m-deduped-resid_post_layer_6_trainer_8",
|
| 25 |
+
"submodule_name": "resid_post_layer_6",
|
| 26 |
+
"l_multiplier": 3.0
|
| 27 |
+
},
|
| 28 |
+
"buffer": {
|
| 29 |
+
"d_submodule": 768,
|
| 30 |
+
"io": "out",
|
| 31 |
+
"n_ctxs": 244,
|
| 32 |
+
"ctx_len": 1024,
|
| 33 |
+
"refresh_batch_size": 32,
|
| 34 |
+
"out_batch_size": 2048,
|
| 35 |
+
"device": "cuda:0"
|
| 36 |
+
}
|
| 37 |
+
}
|
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-leverage-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_8_seed2/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"l2_loss": 8.441039461078066, "l1_loss": 121.47962581750119, "l0": 60.0, "frac_variance_explained": 0.870531931067958, "cossim": 0.8715875419703397, "l2_ratio": 0.871613925153559, "relative_reconstruction_bias": 0.9984585823434772, "loss_original": 2.6030195698593603, "loss_reconstructed": 5.118368091005268, "loss_zero": 11.09529752442331, "frac_recovered": 0.6774183656230117, "frac_alive": 0.0028228759765625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-uniform-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_10_seed2/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0d5c6f4484cd1f28f0bc36ec467e049f992fef32cbb8f62818735fb4f31e7341
|
| 3 |
+
size 403183898
|
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-uniform-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_10_seed2/config.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "HybridSampledTopKTrainer",
|
| 4 |
+
"dict_class": "HybridSampledTopKSAE",
|
| 5 |
+
"lr": 0.0003,
|
| 6 |
+
"steps": 12207,
|
| 7 |
+
"auxk_alpha": 0.03125,
|
| 8 |
+
"warmup_steps": 1000,
|
| 9 |
+
"decay_start": 9765,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"sampling_update_freq": 1,
|
| 13 |
+
"sampling_method": "uniform",
|
| 14 |
+
"ridge_lambda": 0.01,
|
| 15 |
+
"sketching_size": 100,
|
| 16 |
+
"top_k_aux": 384,
|
| 17 |
+
"seed": 2,
|
| 18 |
+
"activation_dim": 768,
|
| 19 |
+
"dict_size": 65536,
|
| 20 |
+
"k": 60,
|
| 21 |
+
"device": "cuda:0",
|
| 22 |
+
"layer": 6,
|
| 23 |
+
"lm_name": "EleutherAI/pythia-160m-deduped",
|
| 24 |
+
"wandb_name": "HybridSampledTopKTrainer-uniform-EleutherAI/pythia-160m-deduped-resid_post_layer_6_trainer_10",
|
| 25 |
+
"submodule_name": "resid_post_layer_6",
|
| 26 |
+
"l_multiplier": 3.0
|
| 27 |
+
},
|
| 28 |
+
"buffer": {
|
| 29 |
+
"d_submodule": 768,
|
| 30 |
+
"io": "out",
|
| 31 |
+
"n_ctxs": 244,
|
| 32 |
+
"ctx_len": 1024,
|
| 33 |
+
"refresh_batch_size": 32,
|
| 34 |
+
"out_batch_size": 2048,
|
| 35 |
+
"device": "cuda:0"
|
| 36 |
+
}
|
| 37 |
+
}
|
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-uniform-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_10_seed2/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"l2_loss": 8.463962757226193, "l1_loss": 130.3787534309156, "l0": 60.0, "frac_variance_explained": 0.86707542701201, "cossim": 0.870789596528718, "l2_ratio": 0.8709013696872827, "relative_reconstruction_bias": 0.99878458182017, "loss_original": 2.6030195698593603, "loss_reconstructed": 4.404964736013701, "loss_zero": 11.09529752442331, "frac_recovered": 0.7763142224514124, "frac_alive": 0.00274658203125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-uniform-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_2_seed0/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f43cc97c1c381ac656f198ea181e4d4924bdd6f91288bb6eb8e71326fcc45aee
|
| 3 |
+
size 403183898
|
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-uniform-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_2_seed0/config.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "HybridSampledTopKTrainer",
|
| 4 |
+
"dict_class": "HybridSampledTopKSAE",
|
| 5 |
+
"lr": 0.0003,
|
| 6 |
+
"steps": 12207,
|
| 7 |
+
"auxk_alpha": 0.03125,
|
| 8 |
+
"warmup_steps": 1000,
|
| 9 |
+
"decay_start": 9765,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"sampling_update_freq": 1,
|
| 13 |
+
"sampling_method": "uniform",
|
| 14 |
+
"ridge_lambda": 0.01,
|
| 15 |
+
"sketching_size": 100,
|
| 16 |
+
"top_k_aux": 384,
|
| 17 |
+
"seed": 0,
|
| 18 |
+
"activation_dim": 768,
|
| 19 |
+
"dict_size": 65536,
|
| 20 |
+
"k": 60,
|
| 21 |
+
"device": "cuda:0",
|
| 22 |
+
"layer": 6,
|
| 23 |
+
"lm_name": "EleutherAI/pythia-160m-deduped",
|
| 24 |
+
"wandb_name": "HybridSampledTopKTrainer-uniform-EleutherAI/pythia-160m-deduped-resid_post_layer_6_trainer_2",
|
| 25 |
+
"submodule_name": "resid_post_layer_6",
|
| 26 |
+
"l_multiplier": 3.0
|
| 27 |
+
},
|
| 28 |
+
"buffer": {
|
| 29 |
+
"d_submodule": 768,
|
| 30 |
+
"io": "out",
|
| 31 |
+
"n_ctxs": 244,
|
| 32 |
+
"ctx_len": 1024,
|
| 33 |
+
"refresh_batch_size": 32,
|
| 34 |
+
"out_batch_size": 2048,
|
| 35 |
+
"device": "cuda:0"
|
| 36 |
+
}
|
| 37 |
+
}
|
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-uniform-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_2_seed0/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"l2_loss": 8.452134623672023, "l1_loss": 122.55520259972775, "l0": 60.0, "frac_variance_explained": 0.8694721062978109, "cossim": 0.871168497836951, "l2_ratio": 0.8718094103264086, "relative_reconstruction_bias": 1.001582398559108, "loss_original": 2.6030195698593603, "loss_reconstructed": 4.402381441809914, "loss_zero": 11.09529752442331, "frac_recovered": 0.7762271111661737, "frac_alive": 0.00274658203125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-uniform-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_6_seed1/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:808d33e8472d9704f12c85fa27a9c4207bc647bd39828baa05e5066d9288b53e
|
| 3 |
+
size 403183898
|
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-uniform-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_6_seed1/config.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "HybridSampledTopKTrainer",
|
| 4 |
+
"dict_class": "HybridSampledTopKSAE",
|
| 5 |
+
"lr": 0.0003,
|
| 6 |
+
"steps": 12207,
|
| 7 |
+
"auxk_alpha": 0.03125,
|
| 8 |
+
"warmup_steps": 1000,
|
| 9 |
+
"decay_start": 9765,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"sampling_update_freq": 1,
|
| 13 |
+
"sampling_method": "uniform",
|
| 14 |
+
"ridge_lambda": 0.01,
|
| 15 |
+
"sketching_size": 100,
|
| 16 |
+
"top_k_aux": 384,
|
| 17 |
+
"seed": 1,
|
| 18 |
+
"activation_dim": 768,
|
| 19 |
+
"dict_size": 65536,
|
| 20 |
+
"k": 60,
|
| 21 |
+
"device": "cuda:0",
|
| 22 |
+
"layer": 6,
|
| 23 |
+
"lm_name": "EleutherAI/pythia-160m-deduped",
|
| 24 |
+
"wandb_name": "HybridSampledTopKTrainer-uniform-EleutherAI/pythia-160m-deduped-resid_post_layer_6_trainer_6",
|
| 25 |
+
"submodule_name": "resid_post_layer_6",
|
| 26 |
+
"l_multiplier": 3.0
|
| 27 |
+
},
|
| 28 |
+
"buffer": {
|
| 29 |
+
"d_submodule": 768,
|
| 30 |
+
"io": "out",
|
| 31 |
+
"n_ctxs": 244,
|
| 32 |
+
"ctx_len": 1024,
|
| 33 |
+
"refresh_batch_size": 32,
|
| 34 |
+
"out_batch_size": 2048,
|
| 35 |
+
"device": "cuda:0"
|
| 36 |
+
}
|
| 37 |
+
}
|
._seeded_sae_bundle_EleutherAI_pythia-160m-deduped_hybrid_sampled_top_k/resid_post_layer_6/HybridSampledTopKTrainer-uniform-EleutherAI_pythia-160m-deduped-resid_post_layer_6_trainer_6_seed1/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"l2_loss": 8.463646339647697, "l1_loss": 130.58196420380563, "l0": 60.0, "frac_variance_explained": 0.8669460072661891, "cossim": 0.8709347663503705, "l2_ratio": 0.8710685751654885, "relative_reconstruction_bias": 1.0013282949274236, "loss_original": 2.6030195698593603, "loss_reconstructed": 4.356847734162302, "loss_zero": 11.09529752442331, "frac_recovered": 0.7823429197976084, "frac_alive": 0.00274658203125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|