jinaai
/

jina-clip-implementation

@@ -391,7 +391,33 @@ class JinaCLIPModel(JinaCLIPPreTrainedModel):
         device: Optional[torch.device] = None,
         normalize_embeddings: bool = False,
     ) -> Union[List[torch.Tensor], np.ndarray, torch.Tensor]:
-        from PIL.Image import  Image
         is_training = self.training
         self.eval()
@@ -422,17 +448,19 @@ class JinaCLIPModel(JinaCLIPPreTrainedModel):
         if has_tqdm:
             range_iter = trange(
                 0,
-                len(sentences),
                 batch_size,
                 desc="Encoding",
                 disable=not show_progress_bar,
             )
         else:
-            range_iter = range(0, len(sentences), batch_size)
         for i in range_iter:
             processed_inputs = self.process([Image.open(image) for image in images])
             embeddings = self.get_image_features(processed_inputs)
             if convert_to_numpy:
                 embeddings = embeddings.cpu()
             all_embeddings.extend(embeddings)

         device: Optional[torch.device] = None,
         normalize_embeddings: bool = False,
     ) -> Union[List[torch.Tensor], np.ndarray, torch.Tensor]:
+        """
+        Computes image embeddings.
+        Args:
+            images(`str` or `List[str]`):
+                image or images paths to be encoded
+            batch_size(`int`, *optional*, defaults to 32):
+                Batch size for the computation
+            show_progress_bar(`bool`, *optional*, defaults to None):
+                Show a progress bar when encoding images.
+                If set to None, progress bar is only shown when `logger.level == logging.INFO` or `logger.level == logging.DEBUG`.
+            convert_to_numpy(`bool`, *optional*, defaults to True):
+                If true, the output is a list of numpy vectors.
+                Else, it is a list of pytorch tensors.
+            convert_to_tensor(`bool`, *optional*, defaults to False):
+                If true, you get one large tensor as return.
+                Overwrites any setting from convert_to_numpy
+            device(`torch.device`, *optional*, defaults to None):
+                Which torch.device to use for the computation
+            normalize_embeddings(`bool`, *optional*, defaults to False):
+                If set to true, returned vectors will have length 1. In that case, the faster dot-product (util.dot_score) instead of cosine similarity can be used.
+        Returns:
+            By default, a list of tensors is returned.
+            If convert_to_tensor, a stacked tensor is returned.
+            If convert_to_numpy, a numpy matrix is returned.
+        """
+        from PIL.Image import Image
         is_training = self.training
         self.eval()
         if has_tqdm:
             range_iter = trange(
                 0,
+                len(images),
                 batch_size,
                 desc="Encoding",
                 disable=not show_progress_bar,
             )
         else:
+            range_iter = range(0, len(images), batch_size)
         for i in range_iter:
             processed_inputs = self.process([Image.open(image) for image in images])
             embeddings = self.get_image_features(processed_inputs)
+            if normalize_embeddings:
+                embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)
             if convert_to_numpy:
                 embeddings = embeddings.cpu()
             all_embeddings.extend(embeddings)