Upload README.md with huggingface_hub
Browse files
README.md
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
# For reference on model card metadata, see the spec: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1
|
| 3 |
+
# Doc / guide: https://huggingface.co/docs/hub/model-cards
|
| 4 |
+
tags:
|
| 5 |
+
- FlagEmbedding
|
| 6 |
+
- Embedding
|
| 7 |
+
- Hybrid Retrieval
|
| 8 |
+
- ONNX
|
| 9 |
+
- Optimum
|
| 10 |
+
- ONNXRuntime
|
| 11 |
+
- Multilingual
|
| 12 |
+
license: mit
|
| 13 |
+
base_model: BAAI/bge-m3
|
| 14 |
+
---
|
| 15 |
+
|
| 16 |
+
# Model Card for philipchung/bge-m3-onnx
|
| 17 |
+
|
| 18 |
+
<!-- Provide a quick summary of what the model is/does. -->
|
| 19 |
+
|
| 20 |
+
This is the [BAAI/BGE-M3](https://huggingface.co/BAAI/bge-m3) inference model converted to ONNX format and can be used with Optimum ONNX Runtime with CPU acceleration. This model outputs all 3 embedding types (Dense, Sparse, ColBERT).
|
| 21 |
+
|
| 22 |
+
No ONNX optimizations are applied to this model. If you want to apply optimizations, use the export script included in this repo to generate a version of ONNX model with optimizations.
|
| 23 |
+
|
| 24 |
+
Some of the code is adapted from [aapot/bge-m3-onnx](https://huggingface.co/aapot/bge-m3-onnx). The model in this repo inherits from `PretrainedModel` and the ONNX model can be downloaded from Huggingface Hub and used directly with the `model.from_pretrained()` method.
|
| 25 |
+
|
| 26 |
+
## How to Use
|
| 27 |
+
|
| 28 |
+
```python
|
| 29 |
+
from collections import defaultdict
|
| 30 |
+
from typing import Any
|
| 31 |
+
|
| 32 |
+
import numpy as np
|
| 33 |
+
from optimum.onnxruntime import ORTModelForCustomTasks
|
| 34 |
+
from transformers import AutoTokenizer
|
| 35 |
+
|
| 36 |
+
# Download ONNX model from Huggingface Hub
|
| 37 |
+
onnx_model = ORTModelForCustomTasks.from_pretrained("philipchung/bge-m3-onnx")
|
| 38 |
+
tokenizer = AutoTokenizer.from_pretrained("philipchung/bge-m3-onnx")
|
| 39 |
+
# Inference forward pass
|
| 40 |
+
sentences = ["First test sentence.", "Second test sentence"]
|
| 41 |
+
inputs = tokenizer(
|
| 42 |
+
sentences,
|
| 43 |
+
padding="longest",
|
| 44 |
+
return_tensors="np",
|
| 45 |
+
)
|
| 46 |
+
outputs = onnx_model.forward(**inputs)
|
| 47 |
+
|
| 48 |
+
def process_token_weights(
|
| 49 |
+
token_weights: np.ndarray, input_ids: list
|
| 50 |
+
) -> defaultdict[Any, int]:
|
| 51 |
+
"""Convert sparse token weights into dictionary of token indices and corresponding weights.
|
| 52 |
+
|
| 53 |
+
Function is taken from the original FlagEmbedding.bge_m3.BGEM3FlagModel from the
|
| 54 |
+
_process_token_weights() function defined within the encode() method.
|
| 55 |
+
"""
|
| 56 |
+
# convert to dict
|
| 57 |
+
result = defaultdict(int)
|
| 58 |
+
unused_tokens = set(
|
| 59 |
+
[
|
| 60 |
+
tokenizer.cls_token_id,
|
| 61 |
+
tokenizer.eos_token_id,
|
| 62 |
+
tokenizer.pad_token_id,
|
| 63 |
+
tokenizer.unk_token_id,
|
| 64 |
+
]
|
| 65 |
+
)
|
| 66 |
+
for w, idx in zip(token_weights, input_ids, strict=False):
|
| 67 |
+
if idx not in unused_tokens and w > 0:
|
| 68 |
+
idx = str(idx)
|
| 69 |
+
# w = int(w)
|
| 70 |
+
if w > result[idx]:
|
| 71 |
+
result[idx] = w
|
| 72 |
+
return result
|
| 73 |
+
|
| 74 |
+
# Each sentence results in a dict[str, list]float] | dict[str, float] | list[list[float]]] which corresponds to a dict with dense, sparse, and colbert embeddings.
|
| 75 |
+
embeddings_list = []
|
| 76 |
+
for input_ids, dense_vec, sparse_vec, colbert_vec in zip(
|
| 77 |
+
inputs["input_ids"],
|
| 78 |
+
outputs["dense_vecs"],
|
| 79 |
+
outputs["sparse_vecs"],
|
| 80 |
+
outputs["colbert_vecs"],
|
| 81 |
+
strict=False,
|
| 82 |
+
):
|
| 83 |
+
# Convert token weights into dictionary of token indices and corresponding weights
|
| 84 |
+
token_weights = sparse_vec.astype(float).squeeze(-1)
|
| 85 |
+
sparse_embeddings = process_token_weights(
|
| 86 |
+
token_weights,
|
| 87 |
+
input_ids.tolist(),
|
| 88 |
+
)
|
| 89 |
+
multivector_embedding = {
|
| 90 |
+
"dense": dense_vec.astype(float).tolist(), # (1024)
|
| 91 |
+
"sparse": dict(sparse_embeddings), # dict[token_index, weight]
|
| 92 |
+
"colbert": colbert_vec.astype(float).tolist(), # (token len, 1024)
|
| 93 |
+
}
|
| 94 |
+
embeddings_list.append(multivector_embedding)
|
| 95 |
+
```
|