Vladimir Zaigrajew
commited on
Commit
·
e6a8524
1
Parent(s):
bfd0920
Replaced files with good ones after I broke everything like an hour ago with push --force
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +0 -0
- README.md +109 -10
- ViT-B_16/centered/16384_512_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512.pth +3 -0
- ViT-B_16/centered/16384_512_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512.pth +3 -0
- ViT-B_16/centered/4096_512_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512.pth +3 -0
- ViT-B_16/centered/4096_512_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512.pth +3 -0
- ViT-B_16/centered/8192_512_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512.pth +3 -0
- ViT-B_16/centered/8192_512_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512.pth +3 -0
- ViT-B_16/centered/Concept_Interpreter_16384_512_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512_disect_ViT-B~16_-1_text_20000_512.npy +3 -0
- ViT-B_16/centered/Concept_Interpreter_16384_512_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512_disect_ViT-B~16_-1_text_20000_512.npy +3 -0
- ViT-B_16/centered/Concept_Interpreter_4096_512_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512_disect_ViT-B~16_-1_text_20000_512.npy +3 -0
- ViT-B_16/centered/Concept_Interpreter_4096_512_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512_disect_ViT-B~16_-1_text_20000_512.npy +3 -0
- ViT-B_16/centered/Concept_Interpreter_8192_512_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512_disect_ViT-B~16_-1_text_20000_512.npy +3 -0
- ViT-B_16/centered/Concept_Interpreter_8192_512_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512_disect_ViT-B~16_-1_text_20000_512.npy +3 -0
- ViT-B_16/not_centered/16384_512_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512.pth +3 -0
- ViT-B_16/not_centered/16384_512_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512.pth +3 -0
- ViT-B_16/not_centered/4096_512_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512.pth +3 -0
- ViT-B_16/not_centered/4096_512_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512.pth +3 -0
- ViT-B_16/not_centered/8192_512_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512.pth +3 -0
- ViT-B_16/not_centered/8192_512_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512.pth +3 -0
- ViT-B_16/not_centered/Concept_Interpreter_16384_512_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512_disect_ViT-B~16_-1_text_20000_512.npy +3 -0
- ViT-B_16/not_centered/Concept_Interpreter_16384_512_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512_disect_ViT-B~16_-1_text_20000_512.npy +3 -0
- ViT-B_16/not_centered/Concept_Interpreter_4096_512_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512_disect_ViT-B~16_-1_text_20000_512.npy +3 -0
- ViT-B_16/not_centered/Concept_Interpreter_4096_512_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512_disect_ViT-B~16_-1_text_20000_512.npy +3 -0
- ViT-B_16/not_centered/Concept_Interpreter_8192_512_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512_disect_ViT-B~16_-1_text_20000_512.npy +3 -0
- ViT-B_16/not_centered/Concept_Interpreter_8192_512_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512_disect_ViT-B~16_-1_text_20000_512.npy +3 -0
- ViT-L_14/centered/12288_768_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768.pth +3 -0
- ViT-L_14/centered/12288_768_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768.pth +3 -0
- ViT-L_14/centered/24576_768_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768.pth +3 -0
- ViT-L_14/centered/24576_768_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768.pth +3 -0
- 6144_768_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768.pth → ViT-L_14/centered/6144_768_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768.pth +0 -0
- ViT-L_14/centered/6144_768_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768.pth +3 -0
- ViT-L_14/centered/Concept_Interpreter_12288_768_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768_disect_ViT-L~14_-1_text_20000_768.npy +3 -0
- ViT-L_14/centered/Concept_Interpreter_12288_768_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768_disect_ViT-L~14_-1_text_20000_768.npy +3 -0
- ViT-L_14/centered/Concept_Interpreter_24576_768_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768_disect_ViT-L~14_-1_text_20000_768.npy +3 -0
- ViT-L_14/centered/Concept_Interpreter_24576_768_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768_disect_ViT-L~14_-1_text_20000_768.npy +3 -0
- Concept_Interpreter_6144_768_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768_disect_ViT-L~14_-1_text_20000_768.npy → ViT-L_14/centered/Concept_Interpreter_6144_768_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768_disect_ViT-L~14_-1_text_20000_768.npy +0 -0
- ViT-L_14/centered/Concept_Interpreter_6144_768_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768_disect_ViT-L~14_-1_text_20000_768.npy +3 -0
- ViT-L_14/not_centered/._.DS_Store +0 -0
- ViT-L_14/not_centered/12288_768_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768.pth +3 -0
- ViT-L_14/not_centered/12288_768_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768.pth +3 -0
- ViT-L_14/not_centered/24576_768_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768.pth +3 -0
- ViT-L_14/not_centered/24576_768_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768.pth +3 -0
- ViT-L_14/not_centered/6144_768_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768.pth +3 -0
- ViT-L_14/not_centered/6144_768_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768.pth +3 -0
- ViT-L_14/not_centered/Concept_Interpreter_12288_768_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768_disect_ViT-L~14_-1_text_20000_768.npy +3 -0
- ViT-L_14/not_centered/Concept_Interpreter_12288_768_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768_disect_ViT-L~14_-1_text_20000_768.npy +3 -0
- ViT-L_14/not_centered/Concept_Interpreter_24576_768_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768_disect_ViT-L~14_-1_text_20000_768.npy +3 -0
- ViT-L_14/not_centered/Concept_Interpreter_24576_768_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768_disect_ViT-L~14_-1_text_20000_768.npy +3 -0
- ViT-L_14/not_centered/Concept_Interpreter_6144_768_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768_disect_ViT-L~14_-1_text_20000_768.npy +3 -0
.gitattributes
CHANGED
File without changes
|
README.md
CHANGED
@@ -1,14 +1,113 @@
|
|
1 |
---
|
2 |
-
title: MSAE
|
3 |
-
emoji: 🌖
|
4 |
-
colorFrom: gray
|
5 |
-
colorTo: pink
|
6 |
-
sdk: gradio
|
7 |
-
sdk_version: 5.38.0
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
license: mit
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
---
|
13 |
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
license: mit
|
3 |
+
datasets:
|
4 |
+
- pixparse/cc3m-wds
|
5 |
+
base_model:
|
6 |
+
- openai/clip-vit-large-patch14
|
7 |
+
- openai/clip-vit-base-patch16
|
8 |
+
tags:
|
9 |
+
- clip
|
10 |
+
- vision
|
11 |
+
- text
|
12 |
---
|
13 |
|
14 |
+
# Matryoshka Sparse Autoencoders (MSAE) for CLIP
|
15 |
+
|
16 |
+
This repository provides PyTorch implementations of Matryoshka Sparse Autoencoders (MSAEs) trained on the image encoder of CLIP (ViT-L/14 and ViT-B/16). These models are designed to learn interpretable, hierarchical features from complex multimodal representations.
|
17 |
+
|
18 |
+
For a deeper dive into the underlying theory and the full research implementation, please see the original [MSAE repository](https://github.com/WolodjaZ/MSAE) and the accompanying paper.
|
19 |
+
|
20 |
+
## What is a Sparse Autoencoder (SAE)?
|
21 |
+
|
22 |
+
Sparse autoencoders (SAEs) are useful for detecting and steering interpretable features within complex neural networks. They learn to represent complex data in a sparse manner, meaning that only a small number of neurons are activated at any given time which enable to reconstruct the input data. This sparsity leads to more interpretable representations, as each active neuron can be associated with a specific feature or concept. As a result, SAEs can be used to identify and manipulate specific features in the data, making them powerful tools for understanding and controlling the behavior of neural networks.
|
23 |
+
|
24 |
+
## Key Features
|
25 |
+
|
26 |
+
- **Interpretability**: SAEs learn to decompose complex representations into sparse, interpretable features. This allows for a better understanding of what the model has learned.
|
27 |
+
- **Hierarchical Features**: The Matryoshka SAE (MSAE) architecture learns features at multiple granularities simultaneously, from fine-grained details to high-level concepts.
|
28 |
+
- **Model Steering**: By identifying and manipulating specific features, you can steer the behavior of the CLIP model.
|
29 |
+
- **Simple Integration**: The provided `sae.py` module allows for easy loading and integration of the trained models into your own projects.
|
30 |
+
|
31 |
+
## Repository Structure
|
32 |
+
|
33 |
+
The repository is organized as follows:
|
34 |
+
|
35 |
+
- `sae.py`: A self-contained Python module with the SAE and MSAE model implementations to run the inference.
|
36 |
+
- `clip_disect_20k.txt`: A vocabulary file containing 20,000 concept names used for interpreting the learned features.
|
37 |
+
- `ViT-L_14/`: Contains the trained SAE models for the CLIP ViT-L/14 image encoder.
|
38 |
+
- `ViT-B_16/`: Contains the trained SAE models for the CLIP ViT-B/16 image encoder.
|
39 |
+
|
40 |
+
Each model directory (`ViT-L_14` and `ViT-B_16`) is further subdivided into:
|
41 |
+
|
42 |
+
- `centered/`: Models trained on mean-centered features.
|
43 |
+
- `not_centered/`: Models trained on non-centered features.
|
44 |
+
|
45 |
+
Additionally, each directory contains `.pth` files for the model weights and `.npy` files for the concept matching scores.
|
46 |
+
|
47 |
+
## Understanding the Model Names
|
48 |
+
|
49 |
+
The model filenames follow a consistent naming convention that encodes the model's hyperparameters. Here's how to interpret a typical filename:
|
50 |
+
|
51 |
+
`{n_latents}_{n_inputs}_{activation}_{k}_{weighting}_{tied}_{normalized}_{soft_cap}_{dataset}.pth`
|
52 |
+
|
53 |
+
Where:
|
54 |
+
|
55 |
+
- `n_latents`: The number of latent features in the SAE.
|
56 |
+
- `n_inputs`: The input dimensionality (e.g., 768 for ViT-L/14, 512 for ViT-B/16).
|
57 |
+
- `activation`: The activation function used (e.g., `TopKReLU`).
|
58 |
+
- `k`: The number of smallest trained active latents for the `TopK` activation.
|
59 |
+
- `weighting`: Whether the model was trained with uniform weighting (UW) or reverse weighting (RW).
|
60 |
+
- `tied`: Indicates if the model encoder is tied to the decoder.
|
61 |
+
- `normalized`: Indicates if the model was trained with normalized inputs.
|
62 |
+
- `soft_cap`: Indicates if the model uses soft capping for the latent features.
|
63 |
+
- `dataset`: The dataset used for training (e.g., `cc3m`).
|
64 |
+
|
65 |
+
The concept matching scores are stored in `.npy` files with a similar naming convention: `Concept_Interpreter_{model_name}_{vocab_name}.npy`, where `vocab_name` indicates the vocabulary used for concept matching.
|
66 |
+
|
67 |
+
## How to Use
|
68 |
+
|
69 |
+
To get started, you'll need to have PyTorch and NumPy installed.
|
70 |
+
|
71 |
+
```bash
|
72 |
+
pip install torch numpy
|
73 |
+
```
|
74 |
+
|
75 |
+
First, copy the `sae.py` file to your working directory. Then, you can load a model and its corresponding concept vocabulary as follows:
|
76 |
+
|
77 |
+
```python
|
78 |
+
import torch
|
79 |
+
import numpy as np
|
80 |
+
from sae import SAE
|
81 |
+
from huggingface_hub import hf_hub_download
|
82 |
+
|
83 |
+
# Download the SAE model weights
|
84 |
+
weights_path = hf_hub_download(
|
85 |
+
repo_id="WolodjaZ/MSAE",
|
86 |
+
filename="ViT-L_14/centered/6144_768_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768.pth"
|
87 |
+
)
|
88 |
+
sae_model = SAE(weights_path)
|
89 |
+
|
90 |
+
# Download the concept matching scores for the model
|
91 |
+
vocab_path = hf_hub_download(
|
92 |
+
repo_id="WolodjaZ/MSAE",
|
93 |
+
filename="ViT-L_14/centered/Concept_Interpreter_6144_768_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768_disect_ViT-L~14_-1_text_20000_768.npy"
|
94 |
+
)
|
95 |
+
concept_match_scores = np.load(vocab_path)
|
96 |
+
|
97 |
+
# Load the vocabulary names
|
98 |
+
with open('clip_disect_20k.txt', 'r') as f:
|
99 |
+
vocab_names = [line.strip() for line in f.readlines()]
|
100 |
+
|
101 |
+
print(f"Concept match scores shape: {concept_match_scores.shape}")
|
102 |
+
print(f"Vocabulary size: {len(vocab_names)}")
|
103 |
+
|
104 |
+
# Now you can use the model to encode and decode your own data
|
105 |
+
# For a detailed example, please refer to the demo notebook in the original repository:
|
106 |
+
# https://github.com/WolodjaZ/MSAE/blob/main/demo.ipynb
|
107 |
+
```
|
108 |
+
|
109 |
+
This example demonstrates how to load a specific SAE model and its associated concept names. You can adapt the `filename` in `hf_hub_download` to load any of the other available models. For a complete guide on how to use the model for feature extraction and steering, please refer to the [demo notebook](https://github.com/WolodjaZ/MSAE/blob/main/demo.ipynb) in the original MSAE repository.
|
110 |
+
|
111 |
+
## Citation
|
112 |
+
|
113 |
+
Paper: https://arxiv.org/abs/2502.20578
|
ViT-B_16/centered/16384_512_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f87530f9bba1b8366b2213b3ad23d497427d9bddefb4862de0030955c92e98f2
|
3 |
+
size 67314062
|
ViT-B_16/centered/16384_512_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8780986a61a4ef0c4181f5204966027980e34f8dbbfc3ad0fa0b723f70fa5509
|
3 |
+
size 67314062
|
ViT-B_16/centered/4096_512_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:28d923c96b7d26a4fab04e20f9dbd4d223cf1f512fc847104dd7a53dc7d73035
|
3 |
+
size 16834948
|
ViT-B_16/centered/4096_512_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1794297e3b88c6a1dee2affa50e28d2e5d132c388dbaf7904dd61e32547b3815
|
3 |
+
size 16834948
|
ViT-B_16/centered/8192_512_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:26ad0cec5b81140885e9331ff0367956149f76b472b11fb7ee335e7e840c6bd3
|
3 |
+
size 33661316
|
ViT-B_16/centered/8192_512_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fd80b19a40cfd5dd9b28feeddff95c6429e2929c6fe280fed4c191b24f7ca1ad
|
3 |
+
size 33661316
|
ViT-B_16/centered/Concept_Interpreter_16384_512_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512_disect_ViT-B~16_-1_text_20000_512.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:41064a0812be9559cb5c1e789f8cf4ec02732fa167ff92d014b047757779d749
|
3 |
+
size 1310720128
|
ViT-B_16/centered/Concept_Interpreter_16384_512_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512_disect_ViT-B~16_-1_text_20000_512.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:06c9013aae027f6c206a5423da29bd92cf7b81ef63bdf1bb23c170a7e98c3b5a
|
3 |
+
size 1310720128
|
ViT-B_16/centered/Concept_Interpreter_4096_512_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512_disect_ViT-B~16_-1_text_20000_512.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1aefad1d81ba571c859493d094890902fb388a2950404bfccb98b2b4a31a4ad8
|
3 |
+
size 327680128
|
ViT-B_16/centered/Concept_Interpreter_4096_512_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512_disect_ViT-B~16_-1_text_20000_512.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:95e4b4c20c15212778fa109da604e2ce015d977aa20414d74274407afef0f4de
|
3 |
+
size 327680128
|
ViT-B_16/centered/Concept_Interpreter_8192_512_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512_disect_ViT-B~16_-1_text_20000_512.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:871db72478420c54dc686816152f82e8f3bd9e75bf88fb5848aa74c372eddf51
|
3 |
+
size 655360128
|
ViT-B_16/centered/Concept_Interpreter_8192_512_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512_disect_ViT-B~16_-1_text_20000_512.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ecdefa4e94a864a6f108281143c73e72b592865cf3b891e1324f05de218fc4da
|
3 |
+
size 655360128
|
ViT-B_16/not_centered/16384_512_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8883a4cc01604f24f76d379f304e1427bbfb22fed33344210f1cf70f26938a58
|
3 |
+
size 67314062
|
ViT-B_16/not_centered/16384_512_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8e30463d780a1e61739335d8c6c9dc703831704af30c222f796fa16b2b1abeae
|
3 |
+
size 67314062
|
ViT-B_16/not_centered/4096_512_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0a4815f602c2fbd9bdbc5fc401d778b14ac69218639f5c180ddc528c733a3965
|
3 |
+
size 16834948
|
ViT-B_16/not_centered/4096_512_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:acbcb059adb3ad773f5790fe479ad35ef3ef8ea0a2b8ef7703106f8f7dfda6ef
|
3 |
+
size 16834948
|
ViT-B_16/not_centered/8192_512_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3a2e8919d2b6ae1ba055c7cb1111039e9083e1bcd9c2ff79864e8943b4897ff1
|
3 |
+
size 33661316
|
ViT-B_16/not_centered/8192_512_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fa782f88b1a37a218ccbc40399b6d1394aa42747e84433dd776aaacc5888a344
|
3 |
+
size 33661316
|
ViT-B_16/not_centered/Concept_Interpreter_16384_512_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512_disect_ViT-B~16_-1_text_20000_512.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8cc88a52b473247d088f0f65a1ee58a4864da31af215a9076821b0fbe59b5814
|
3 |
+
size 1310720128
|
ViT-B_16/not_centered/Concept_Interpreter_16384_512_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512_disect_ViT-B~16_-1_text_20000_512.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9ec9aab849549e3fce328a8bfbb93f65039021402ea5c35504991d46205752b1
|
3 |
+
size 1310720128
|
ViT-B_16/not_centered/Concept_Interpreter_4096_512_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512_disect_ViT-B~16_-1_text_20000_512.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c4fe591941506764938b9e3ba5bc52133765f2911b24c232e202a70773b899e7
|
3 |
+
size 327680128
|
ViT-B_16/not_centered/Concept_Interpreter_4096_512_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512_disect_ViT-B~16_-1_text_20000_512.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:95aa6a46a5302eac986b6e21b17f712c2266c2ac0d2d5536c915221c1d7d8cb2
|
3 |
+
size 327680128
|
ViT-B_16/not_centered/Concept_Interpreter_8192_512_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512_disect_ViT-B~16_-1_text_20000_512.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:72fb8fe9b90785663d7d73cd92683c6ed912b69a9f7a8db2a08f9a367ced18dd
|
3 |
+
size 655360128
|
ViT-B_16/not_centered/Concept_Interpreter_8192_512_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-B~16_train_image_2905936_512_disect_ViT-B~16_-1_text_20000_512.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dc2b0f649d84caa867172d3fb9d141cc9d833aa4f5160e426de82c51b066dc2f
|
3 |
+
size 655360128
|
ViT-L_14/centered/12288_768_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8d0f6016b5914c24a178d07241fe7c20cc74936fe8dec60afac16101dbd393e5
|
3 |
+
size 75655630
|
ViT-L_14/centered/12288_768_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:31e37f1f1510451ac51805bd66119e0bdae8138441ca1ded369d5f206479ded7
|
3 |
+
size 75655630
|
ViT-L_14/centered/24576_768_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8d5fc87254b011b312fbef112df2d239e649f8de2173cae25ac3c39099e29c6e
|
3 |
+
size 151300558
|
ViT-L_14/centered/24576_768_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cb8b13eb302c8494b8af4c82f76d4af1fe54455b2aa6dc8af7deb6e5ffe8ab15
|
3 |
+
size 151300558
|
6144_768_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768.pth → ViT-L_14/centered/6144_768_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768.pth
RENAMED
File without changes
|
ViT-L_14/centered/6144_768_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:816390b726fff214280ae527f16caa2d27274ecd85c1be0ee93a1506de8a6941
|
3 |
+
size 37833156
|
ViT-L_14/centered/Concept_Interpreter_12288_768_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768_disect_ViT-L~14_-1_text_20000_768.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f5d7412d7020c8a52052b821e9428bd8e798d0dbdf05b3f9cd2832db27c11dee
|
3 |
+
size 983040128
|
ViT-L_14/centered/Concept_Interpreter_12288_768_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768_disect_ViT-L~14_-1_text_20000_768.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:923f2eadbca66a0b872e344b6e41098248ec1052cfee114080b7b09b12a0882d
|
3 |
+
size 983040128
|
ViT-L_14/centered/Concept_Interpreter_24576_768_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768_disect_ViT-L~14_-1_text_20000_768.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c8363b945516e42a38f9e86f02300051cc9e132039f53bca7b40684a06557adc
|
3 |
+
size 1966080128
|
ViT-L_14/centered/Concept_Interpreter_24576_768_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768_disect_ViT-L~14_-1_text_20000_768.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:93b8c7c2a58d6546684a448b3a163d55fa6a1b07fd454c385203eacf00826e82
|
3 |
+
size 1966080128
|
Concept_Interpreter_6144_768_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768_disect_ViT-L~14_-1_text_20000_768.npy → ViT-L_14/centered/Concept_Interpreter_6144_768_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768_disect_ViT-L~14_-1_text_20000_768.npy
RENAMED
File without changes
|
ViT-L_14/centered/Concept_Interpreter_6144_768_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768_disect_ViT-L~14_-1_text_20000_768.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:27481bc1557944d8ba6d8b36d405d4019e49285b5aafc80733c898ef62c315e4
|
3 |
+
size 491520128
|
ViT-L_14/not_centered/._.DS_Store
ADDED
Binary file (4.1 kB). View file
|
|
ViT-L_14/not_centered/12288_768_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8386aa9529c4833f22188a51c5689be63f093d2a9a1e8d41df3b2440ce4c685f
|
3 |
+
size 75655630
|
ViT-L_14/not_centered/12288_768_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:030daf3813fdcea96093ed7e4d1c0297b090cadce911409047005484b524be88
|
3 |
+
size 75655630
|
ViT-L_14/not_centered/24576_768_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5a4b154fa581c5d7e793ffffbcc46dfc03c4b6cfb7f8ad474f5c327125f4d091
|
3 |
+
size 151300558
|
ViT-L_14/not_centered/24576_768_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c368c8ea0d9c1b14c2c58c03689dcfdd6258833f6ab41103a30ccdff3c91ec43
|
3 |
+
size 151300558
|
ViT-L_14/not_centered/6144_768_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ed5f569cea3cd0da932e3fd92bf326d9fc8d3291f4ebf50f20a38558ea33ada2
|
3 |
+
size 37833156
|
ViT-L_14/not_centered/6144_768_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:975b8526c5f0bc38f8e8ceeb496403d8710b988f548e8ad43c811ea1498e5d7c
|
3 |
+
size 37833156
|
ViT-L_14/not_centered/Concept_Interpreter_12288_768_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768_disect_ViT-L~14_-1_text_20000_768.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b23db51cc483b1e5079a8a70ba72216209c23b75d24ffa479986f9d9389b3fa6
|
3 |
+
size 983040128
|
ViT-L_14/not_centered/Concept_Interpreter_12288_768_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768_disect_ViT-L~14_-1_text_20000_768.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:63e47558d5805593c7e448838eaae13ed68301322d97d4769fa6ebb3abec3455
|
3 |
+
size 983040128
|
ViT-L_14/not_centered/Concept_Interpreter_24576_768_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768_disect_ViT-L~14_-1_text_20000_768.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dfd037dbd6b6bc76cf61e2e1060ce0a253ca1e76f43c2a440f0679df95f432b1
|
3 |
+
size 1966080128
|
ViT-L_14/not_centered/Concept_Interpreter_24576_768_TopKReLU_64_UW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768_disect_ViT-L~14_-1_text_20000_768.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4afa8726f7d317476dee2a881b737a5f7a38955cf17e609d10d3d9a6c0254073
|
3 |
+
size 1966080128
|
ViT-L_14/not_centered/Concept_Interpreter_6144_768_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768_disect_ViT-L~14_-1_text_20000_768.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:757bd2a3200eb2e5c9eef08bd916a84bc1db2ba3f1c6b682c5879810a4cf1f87
|
3 |
+
size 491520128
|