jinaai
/

jina-clip-implementation

@@ -259,7 +259,6 @@ class JinaCLIPModel(JinaCLIPPreTrainedModel):
         sentences: Union[str, List[str]],
         batch_size: int = 32,
         show_progress_bar: Optional[bool] = None,
-        output_value: str = 'sentence_embedding',
         convert_to_numpy: bool = True,
         convert_to_tensor: bool = False,
         device: Optional[torch.device] = None,
@@ -276,10 +275,6 @@ class JinaCLIPModel(JinaCLIPPreTrainedModel):
             show_progress_bar(`bool`, *optional*, defaults to None):
                 Show a progress bar when encoding sentences.
                 If set to None, progress bar is only shown when `logger.level == logging.INFO` or `logger.level == logging.DEBUG`.
-            output_value(`str`, *optional*, defaults to 'sentence_embedding'):
-                Default sentence_embedding, to get sentence embeddings.
-                Can be set to token_embeddings to get wordpiece token embeddings.
-                Set to None, to get all output values
             convert_to_numpy(`bool`, *optional*, defaults to True):
                 If true, the output is a list of numpy vectors.
                 Else, it is a list of pytorch tensors.
@@ -349,16 +344,11 @@ class JinaCLIPModel(JinaCLIPPreTrainedModel):
                 **tokenizer_kwargs,
             ).to(self.device)
-            if output_value == 'token_embeddings':
-                raise NotImplementedError
-            elif output_value is None:
-                raise NotImplementedError
-            else:
-                embeddings = self.get_text_features(input_ids=encoded_input)
-                if normalize_embeddings:
-                    embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)
-                if convert_to_numpy:
-                    embeddings = embeddings.cpu()
             all_embeddings.extend(embeddings)
         all_embeddings = [all_embeddings[idx] for idx in inverse_permutation]

         sentences: Union[str, List[str]],
         batch_size: int = 32,
         show_progress_bar: Optional[bool] = None,
         convert_to_numpy: bool = True,
         convert_to_tensor: bool = False,
         device: Optional[torch.device] = None,
             show_progress_bar(`bool`, *optional*, defaults to None):
                 Show a progress bar when encoding sentences.
                 If set to None, progress bar is only shown when `logger.level == logging.INFO` or `logger.level == logging.DEBUG`.
             convert_to_numpy(`bool`, *optional*, defaults to True):
                 If true, the output is a list of numpy vectors.
                 Else, it is a list of pytorch tensors.
                 **tokenizer_kwargs,
             ).to(self.device)
+            embeddings = self.get_text_features(input_ids=encoded_input)
+            if normalize_embeddings:
+                embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)
+            if convert_to_numpy:
+                embeddings = embeddings.cpu()
             all_embeddings.extend(embeddings)
         all_embeddings = [all_embeddings[idx] for idx in inverse_permutation]