Create README.md
Browse files
README.md
ADDED
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: gemma
|
3 |
+
---
|
4 |
+
|
5 |
+
## Usage
|
6 |
+
|
7 |
+
```python
|
8 |
+
import onnxruntime as ort
|
9 |
+
import os
|
10 |
+
import torch
|
11 |
+
from PIL import Image
|
12 |
+
from transformers import ColPaliProcessor
|
13 |
+
|
14 |
+
|
15 |
+
MODEL_IMAGE_PATH = "ssonpull519/colpali-v1.3-hf-image-onnx-fp32"
|
16 |
+
MODEL_TEXT_PATH = "ssonpull519/colpali-v1.3-hf-text-onnx-fp32"
|
17 |
+
|
18 |
+
device = "cuda"
|
19 |
+
|
20 |
+
processor = ColPaliProcessor.from_pretrained(MODEL_IMAGE_PATH)
|
21 |
+
|
22 |
+
# Your inputs
|
23 |
+
images = [
|
24 |
+
Image.open("image1.png"),
|
25 |
+
Image.open("image2.png"),
|
26 |
+
]
|
27 |
+
queries = [
|
28 |
+
"Who printed the edition of Romeo and Juliet?",
|
29 |
+
"When was the United States Declaration of Independence proclaimed?",
|
30 |
+
]
|
31 |
+
|
32 |
+
# Process the inputs
|
33 |
+
batch_images = processor(images=images, return_tensors="pt") # ['input_ids', 'attention_mask', 'pixel_values']; (B, 1030), (B, 3, 448, 448); input_ids are full of <image> + prefix.
|
34 |
+
batch_queries = processor(text=queries, return_tensors="pt") # ['input_ids', 'attention_mask']; (B, S)
|
35 |
+
|
36 |
+
# move inputs to GPU
|
37 |
+
batch_images = batch_images.to(device)
|
38 |
+
batch_queries = batch_queries.to(device)
|
39 |
+
|
40 |
+
# Convert the inputs to numpy arrays for the ONNX model
|
41 |
+
inputs_images_onnx = {name: tensor.cpu().numpy() for name, tensor in batch_images.items()}
|
42 |
+
inputs_queries_onnx = {name: tensor.cpu().numpy() for name, tensor in batch_queries.items()}
|
43 |
+
|
44 |
+
# Run the ONNX model
|
45 |
+
sess_image = ort.InferenceSession(os.path.join(MODEL_IMAGE_PATH, "model.onnx"))
|
46 |
+
sess_text = ort.InferenceSession(os.path.join(MODEL_TEXT_PATH, "model.onnx"))
|
47 |
+
|
48 |
+
onnx_output_images = sess_image.run(None, inputs_images_onnx)
|
49 |
+
onnx_output_queries = sess_text.run(None, inputs_queries_onnx)
|
50 |
+
|
51 |
+
# Score the queries against the images
|
52 |
+
scores = processor.score_retrieval(torch.Tensor(onnx_output_queries[0]), torch.Tensor(onnx_output_images[0])) # (Bt, Bi, S, 1030) -> (Bt, Bi)
|
53 |
+
|
54 |
+
print("onnx_output size [images]:", onnx_output_images[0].shape)
|
55 |
+
print("onnx_output size [queries]:", onnx_output_queries[0].shape)
|
56 |
+
|
57 |
+
print("scores:")
|
58 |
+
print(scores)
|
59 |
+
```
|
60 |
+
|
61 |
+
## ONNX Conversion Script
|
62 |
+
|
63 |
+
To use `optimum-cli`, `ColPaliForRetrieval` in transformers should be mapped in `AutoModel`, but currently is not. So I just sticked to this script, which is based on the official [guide](https://huggingface.co/docs/optimum/en/exporters/onnx/usage_guides/contribute#exporting-the-model) from huggingface.
|
64 |
+
|
65 |
+
```python
|
66 |
+
from pathlib import Path
|
67 |
+
from optimum.exporters import TasksManager
|
68 |
+
from optimum.exporters.onnx import export
|
69 |
+
from transformers import ColPaliForRetrieval
|
70 |
+
import torch
|
71 |
+
|
72 |
+
|
73 |
+
MODEL_PATH = "vidore/colpali-v1.3-hf"
|
74 |
+
VARIANT = "vision" # one of "vision" or "text"
|
75 |
+
ONNX_PATH = f"onnx/{VARIANT}/model.onnx"
|
76 |
+
MODEL_DTYPE = torch.float32 # one of torch.float32 or torch.float16
|
77 |
+
|
78 |
+
base_model = ColPaliForRetrieval.from_pretrained(MODEL_PATH)
|
79 |
+
base_model = base_model.to(dtype=MODEL_DTYPE)
|
80 |
+
|
81 |
+
onnx_path = Path(ONNX_PATH)
|
82 |
+
|
83 |
+
onnx_config_constructor = TasksManager.get_exporter_config_constructor("onnx", base_model)
|
84 |
+
onnx_config = onnx_config_constructor(base_model.config)
|
85 |
+
onnx_config.variant = VARIANT
|
86 |
+
onnx_inputs, onnx_outputs = export(base_model, onnx_config, onnx_path, onnx_config.DEFAULT_ONNX_OPSET)
|
87 |
+
|
88 |
+
# -- validate model --
|
89 |
+
import onnx
|
90 |
+
|
91 |
+
onnx_model = onnx.load(ONNX_PATH)
|
92 |
+
onnx.checker.check_model(ONNX_PATH)
|
93 |
+
|
94 |
+
from optimum.exporters.onnx import validate_model_outputs
|
95 |
+
|
96 |
+
validate_model_outputs(
|
97 |
+
onnx_config, base_model, onnx_path, ["embeddings"], onnx_config.ATOL_FOR_VALIDATION, use_subprocess=False
|
98 |
+
)
|
99 |
+
|
100 |
+
```
|