refactor: add docstrings
Browse files- modeling_clip.py +31 -3
modeling_clip.py
CHANGED
|
@@ -391,7 +391,33 @@ class JinaCLIPModel(JinaCLIPPreTrainedModel):
|
|
| 391 |
device: Optional[torch.device] = None,
|
| 392 |
normalize_embeddings: bool = False,
|
| 393 |
) -> Union[List[torch.Tensor], np.ndarray, torch.Tensor]:
|
| 394 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 395 |
|
| 396 |
is_training = self.training
|
| 397 |
self.eval()
|
|
@@ -422,17 +448,19 @@ class JinaCLIPModel(JinaCLIPPreTrainedModel):
|
|
| 422 |
if has_tqdm:
|
| 423 |
range_iter = trange(
|
| 424 |
0,
|
| 425 |
-
len(
|
| 426 |
batch_size,
|
| 427 |
desc="Encoding",
|
| 428 |
disable=not show_progress_bar,
|
| 429 |
)
|
| 430 |
else:
|
| 431 |
-
range_iter = range(0, len(
|
| 432 |
|
| 433 |
for i in range_iter:
|
| 434 |
processed_inputs = self.process([Image.open(image) for image in images])
|
| 435 |
embeddings = self.get_image_features(processed_inputs)
|
|
|
|
|
|
|
| 436 |
if convert_to_numpy:
|
| 437 |
embeddings = embeddings.cpu()
|
| 438 |
all_embeddings.extend(embeddings)
|
|
|
|
| 391 |
device: Optional[torch.device] = None,
|
| 392 |
normalize_embeddings: bool = False,
|
| 393 |
) -> Union[List[torch.Tensor], np.ndarray, torch.Tensor]:
|
| 394 |
+
"""
|
| 395 |
+
Computes image embeddings.
|
| 396 |
+
|
| 397 |
+
Args:
|
| 398 |
+
images(`str` or `List[str]`):
|
| 399 |
+
image or images paths to be encoded
|
| 400 |
+
batch_size(`int`, *optional*, defaults to 32):
|
| 401 |
+
Batch size for the computation
|
| 402 |
+
show_progress_bar(`bool`, *optional*, defaults to None):
|
| 403 |
+
Show a progress bar when encoding images.
|
| 404 |
+
If set to None, progress bar is only shown when `logger.level == logging.INFO` or `logger.level == logging.DEBUG`.
|
| 405 |
+
convert_to_numpy(`bool`, *optional*, defaults to True):
|
| 406 |
+
If true, the output is a list of numpy vectors.
|
| 407 |
+
Else, it is a list of pytorch tensors.
|
| 408 |
+
convert_to_tensor(`bool`, *optional*, defaults to False):
|
| 409 |
+
If true, you get one large tensor as return.
|
| 410 |
+
Overwrites any setting from convert_to_numpy
|
| 411 |
+
device(`torch.device`, *optional*, defaults to None):
|
| 412 |
+
Which torch.device to use for the computation
|
| 413 |
+
normalize_embeddings(`bool`, *optional*, defaults to False):
|
| 414 |
+
If set to true, returned vectors will have length 1. In that case, the faster dot-product (util.dot_score) instead of cosine similarity can be used.
|
| 415 |
+
Returns:
|
| 416 |
+
By default, a list of tensors is returned.
|
| 417 |
+
If convert_to_tensor, a stacked tensor is returned.
|
| 418 |
+
If convert_to_numpy, a numpy matrix is returned.
|
| 419 |
+
"""
|
| 420 |
+
from PIL.Image import Image
|
| 421 |
|
| 422 |
is_training = self.training
|
| 423 |
self.eval()
|
|
|
|
| 448 |
if has_tqdm:
|
| 449 |
range_iter = trange(
|
| 450 |
0,
|
| 451 |
+
len(images),
|
| 452 |
batch_size,
|
| 453 |
desc="Encoding",
|
| 454 |
disable=not show_progress_bar,
|
| 455 |
)
|
| 456 |
else:
|
| 457 |
+
range_iter = range(0, len(images), batch_size)
|
| 458 |
|
| 459 |
for i in range_iter:
|
| 460 |
processed_inputs = self.process([Image.open(image) for image in images])
|
| 461 |
embeddings = self.get_image_features(processed_inputs)
|
| 462 |
+
if normalize_embeddings:
|
| 463 |
+
embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)
|
| 464 |
if convert_to_numpy:
|
| 465 |
embeddings = embeddings.cpu()
|
| 466 |
all_embeddings.extend(embeddings)
|