ssonpull519
/

colpali-v1.3-hf-text-onnx-fp16

Model card Files Files and versions

colpali-v1.3-hf-text-onnx-fp16 / README.md

ssonpull519's picture

Update README.md

85a2e68 verified 4 months ago

|

history blame contribute delete

3.41 kB

	---
	license: gemma
	---

	## Usage

	```python
	import onnxruntime as ort
	import os
	import torch
	from PIL import Image
	from transformers import ColPaliProcessor


	MODEL_IMAGE_PATH = "ssonpull519/colpali-v1.3-hf-image-onnx-fp16"
	MODEL_TEXT_PATH = "ssonpull519/colpali-v1.3-hf-text-onnx-fp16"

	device = "cuda"

	processor = ColPaliProcessor.from_pretrained(MODEL_IMAGE_PATH)

	# Your inputs
	images = [
	Image.open("image1.png"),
	Image.open("image2.png"),
	]
	queries = [
	"Who printed the edition of Romeo and Juliet?",
	"When was the United States Declaration of Independence proclaimed?",
	]

	# Process the inputs
	batch_images = processor(images=images, return_tensors="pt") # ['input_ids', 'attention_mask', 'pixel_values']; (B, 1030), (B, 3, 448, 448); input_ids are full of <image> + prefix.
	batch_queries = processor(text=queries, return_tensors="pt") # ['input_ids', 'attention_mask']; (B, S)

	# move inputs to GPU
	batch_images = batch_images.to(device)
	batch_queries = batch_queries.to(device)

	# Convert the inputs to numpy arrays for the ONNX model
	inputs_images_onnx = {name: tensor.cpu().numpy() for name, tensor in batch_images.items()}
	inputs_queries_onnx = {name: tensor.cpu().numpy() for name, tensor in batch_queries.items()}

	# Run the ONNX model
	sess_image = ort.InferenceSession(os.path.join(MODEL_IMAGE_PATH, "model.onnx"))
	sess_text = ort.InferenceSession(os.path.join(MODEL_TEXT_PATH, "model.onnx"))

	onnx_output_images = sess_image.run(None, inputs_images_onnx)
	onnx_output_queries = sess_text.run(None, inputs_queries_onnx)

	# Score the queries against the images
	scores = processor.score_retrieval(torch.Tensor(onnx_output_queries[0]), torch.Tensor(onnx_output_images[0])) # (Bt, Bi, S, 1030) -> (Bt, Bi)

	print("onnx_output size [images]:", onnx_output_images[0].shape)
	print("onnx_output size [queries]:", onnx_output_queries[0].shape)

	print("scores:")
	print(scores)
	```

	## ONNX Conversion Script

	Currently in [pull request](https://github.com/huggingface/optimum/pull/2251) (not merged yet).

	```bash
	optimum-cli export onnx --model vidore/colpali-v1.3-hf ./onnx_output --task feature-extraction --variant text --dtype fp16
	```

	For fp16, there's an issue with transformers that is not fixed for now, so please use script below.

	```python
	from pathlib import Path
	from optimum.exporters import TasksManager
	from optimum.exporters.onnx import export
	from transformers import ColPaliForRetrieval
	import torch


	MODEL_PATH = "vidore/colpali-v1.3-hf"
	VARIANT = "text" # one of "vision" or "text"
	ONNX_PATH = f"onnx/{VARIANT}/model.onnx"
	MODEL_DTYPE = torch.float16 # one of torch.float32 or torch.float16

	base_model = ColPaliForRetrieval.from_pretrained(MODEL_PATH)
	base_model = base_model.to(dtype=MODEL_DTYPE)

	onnx_path = Path(ONNX_PATH)

	onnx_config_constructor = TasksManager.get_exporter_config_constructor("onnx", base_model)
	onnx_config = onnx_config_constructor(base_model.config)
	onnx_config.variant = VARIANT
	onnx_inputs, onnx_outputs = export(base_model, onnx_config, onnx_path, onnx_config.DEFAULT_ONNX_OPSET)

	# -- validate model --
	import onnx

	onnx_model = onnx.load(ONNX_PATH)
	onnx.checker.check_model(ONNX_PATH)

	from optimum.exporters.onnx import validate_model_outputs

	validate_model_outputs(
	onnx_config, base_model, onnx_path, ["embeddings"], onnx_config.ATOL_FOR_VALIDATION, use_subprocess=False
	)

	```