prithivida commited on
Commit
762be6a
·
verified ·
1 Parent(s): c00f7d0

sentence_transformers_support (#2)

Browse files

- Add support to Sentence Transformers (64b5b5e359313c99797571a06b5c17679b2a2cce)
- Update README.md (eab0e1791d4e026cac499dd3a0a23f3463731051)

1_SpladePooling/config.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "pooling_strategy": "max",
3
+ "activation_function": "relu",
4
+ "word_embedding_dimension": null
5
+ }
README.md CHANGED
@@ -12,9 +12,12 @@ tags:
12
  - passage-retrieval
13
  - knowledge-distillation
14
  - document encoder
 
 
 
15
  pretty_name: Independent Implementation of SPLADE++ Model with some efficiency tweaks for Industry setting.
16
- library_name: transformers
17
- pipeline_tag: fill-mask
18
  new_version: prithivida/Splade_PP_en_v2
19
  ---
20
 
@@ -199,9 +202,49 @@ sparse_rep = expander.expand(
199
  ["The Manhattan Project and its atomic bomb helped bring an end to World War II. Its legacy of peaceful uses of atomic energy continues to have an impact on history and science."])
200
  ```
201
 
202
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
 
204
- ## 6c. With HuggingFace
205
 
206
  **NOTEBOOK user? Login first**
207
 
 
12
  - passage-retrieval
13
  - knowledge-distillation
14
  - document encoder
15
+ - sparse-encoder
16
+ - sparse
17
+ - splade
18
  pretty_name: Independent Implementation of SPLADE++ Model with some efficiency tweaks for Industry setting.
19
+ library_name: sentence-transformers
20
+ pipeline_tag: feature-extraction
21
  new_version: prithivida/Splade_PP_en_v2
22
  ---
23
 
 
202
  ["The Manhattan Project and its atomic bomb helped bring an end to World War II. Its legacy of peaceful uses of atomic energy continues to have an impact on history and science."])
203
  ```
204
 
205
+ ## 6c. With Sentence Transformers
206
+
207
+ First install the Sentence Transformers library:
208
+
209
+ ```bash
210
+ pip install -U sentence-transformers
211
+ ```
212
+
213
+ Then you can load this model and run inference.
214
+ ```python
215
+ from sentence_transformers import SparseEncoder
216
+
217
+ # Download from the 🤗 Hub
218
+ model = SparseEncoder("prithivida/Splade_PP_en_v1")
219
+
220
+ # Run inference
221
+ sentence = [
222
+ "The Manhattan Project and its atomic bomb helped bring an end to World War II. Its legacy of peaceful uses of atomic energy continues to have an impact on history and science."
223
+ ]
224
+ embeddings = model.encode(sentence)
225
+ print(embeddings.shape)
226
+ # [1, 30522]
227
+
228
+ decoded_sentence = model.decode(embeddings[0])
229
+ print(f"Number of actual dimensions: {len(decoded_sentence)}")
230
+ decoded_sentence_rounded = [(token, round(score, 2)) for token, score in decoded_sentence]
231
+ print("SPLADE BOW rep:\n", decoded_sentence_rounded)
232
+
233
+ # Number of actual dimensions: 112
234
+ # SPLADE BOW rep:
235
+ # [('manhattan', 2.32), ('atomic', 1.74), ('ny', 1.71), ('project', 1.7), ('1945', 1.65), ('war', 1.54), ('legacy', 1.52), ('peaceful', 1.49), ('bomb', 1.48), ('end', 1.19), ('helped', 1.09), ('impact', 1.07), ('bring', 1.06), ('energy', 1.05),
236
+ # ('ii', 1.02), ('was', 1.0), ('nuclear', 0.96), ('bringing', 0.96), ('purpose', 0.93), ('contribution', 0.88), ('history', 0.82), ('atom', 0.81), ('ended', 0.8), ('help', 0.79), ('use', 0.78), ('projects', 0.74), ('science', 0.73), ('york', 0.71),
237
+ # ('fought', 0.69), ('electricity', 0.64), ('wars', 0.6), ('used', 0.57), ('because', 0.55), ('assisted', 0.55), ('brought', 0.54), ('invented', 0.54), ('affect', 0.53), ('scientific', 0.51), ('heritage', 0.46), ('ending', 0.44), ('peace', 0.43),
238
+ # ('benefit', 0.41), ('aided', 0.41), ('holocaust', 0.4), ('happened', 0.4), ('power', 0.38), ('scientists', 0.38), ('1940s', 0.37), ('safe', 0.37), ('important', 0.37), ('effect', 0.37), ('1946', 0.36), ('supported', 0.36), ('motivation', 0.36),
239
+ # ('started', 0.35), ('invention', 0.34), ('explosion', 0.34), ('continued', 0.34), ('reason', 0.34), ('had', 0.33), ('goal', 0.32), ('descendant', 0.29), ('army', 0.28), ('impacts', 0.28), ('broadway', 0.27), ('mission', 0.27), ('radiation', 0.26),
240
+ # ('continue', 0.24), ('historical', 0.23), ('stalin', 0.22), ('usher', 0.22), ('served', 0.22), ('built', 0.22), ('bronx', 0.21), ('contributed', 0.19), ('advantage', 0.19), ('stop', 0.18), ('cause', 0.17), ('era', 0.16), ('bombs', 0.16), ('helping', 0.15),
241
+ # ('didn', 0.14), ('descendants', 0.14), ('gun', 0.13), ('roosevelt', 0.12), ('benefits', 0.12), ('importance', 0.11), ('1950', 0.11), ('death', 0.1), ('us', 0.1), ('wwii', 0.1), ('wrote', 0.1), ('beneficial', 0.09), ('experiment', 0.09), ('japan', 0.09),
242
+ # ('nazi', 0.08), ('headquarters', 0.07), ('fuel', 0.06), ('its', 0.05), ('influenced', 0.05), ('uses', 0.05), ('decisive', 0.05), ('radioactive', 0.05), ('justified', 0.04), ('worked', 0.04), ('achievements', 0.04), ('significance', 0.04), ('facilitated', 0.04),
243
+ # ('pioneer', 0.03), ('technology', 0.02), ('memorial', 0.01), ('sparked', 0.01)]
244
+
245
+ ```
246
 
247
+ ## 6d. With HuggingFace
248
 
249
  **NOTEBOOK user? Login first**
250
 
config_sentence_transformers.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "SparseEncoder",
3
+ "__version__": {
4
+ "sentence_transformers": "5.0.0",
5
+ "transformers": "4.50.3",
6
+ "pytorch": "2.6.0+cu124"
7
+ },
8
+ "prompts": {
9
+ "query": "",
10
+ "document": ""
11
+ },
12
+ "default_prompt_name": null,
13
+ "similarity_fn_name": "dot"
14
+ }
modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.sparse_encoder.models.MLMTransformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_SpladePooling",
12
+ "type": "sentence_transformers.sparse_encoder.models.SpladePooling"
13
+ }
14
+ ]
sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 512,
3
+ "do_lower_case": false
4
+ }