Update README.md (#2)
Browse files- Update README.md (6d98da90112004186524194285f82d9bdc0a7115)
README.md
CHANGED
|
@@ -40,7 +40,6 @@ import itertools
|
|
| 40 |
import torch
|
| 41 |
|
| 42 |
from transformers import AutoModelForMaskedLM, AutoTokenizer
|
| 43 |
-
from transformers.utils import cached_path,hf_bucket_url
|
| 44 |
|
| 45 |
|
| 46 |
# get sparse vector from dense vectors with shape batch_size * seq_len * vocab_size
|
|
@@ -67,8 +66,8 @@ def transform_sparse_vector_to_dict(sparse_vector):
|
|
| 67 |
|
| 68 |
# download the idf file from model hub. idf is used to give weights for query tokens
|
| 69 |
def get_tokenizer_idf(tokenizer):
|
| 70 |
-
|
| 71 |
-
local_cached_path =
|
| 72 |
with open(local_cached_path) as f:
|
| 73 |
idf = json.load(f)
|
| 74 |
idf_vector = [0]*tokenizer.vocab_size
|
|
|
|
| 40 |
import torch
|
| 41 |
|
| 42 |
from transformers import AutoModelForMaskedLM, AutoTokenizer
|
|
|
|
| 43 |
|
| 44 |
|
| 45 |
# get sparse vector from dense vectors with shape batch_size * seq_len * vocab_size
|
|
|
|
| 66 |
|
| 67 |
# download the idf file from model hub. idf is used to give weights for query tokens
|
| 68 |
def get_tokenizer_idf(tokenizer):
|
| 69 |
+
from huggingface_hub import hf_hub_download
|
| 70 |
+
local_cached_path = hf_hub_download(repo_id="opensearch-project/opensearch-neural-sparse-encoding-doc-v1", filename="idf.json")
|
| 71 |
with open(local_cached_path) as f:
|
| 72 |
idf = json.load(f)
|
| 73 |
idf_vector = [0]*tokenizer.vocab_size
|