Add new SentenceTransformer model with an onnx backend
Browse filesHello!
*This pull request has been automatically generated from the [`push_to_hub`](https://sbert.net/docs/package_reference/sentence_transformer/SentenceTransformer.html#sentence_transformers.SentenceTransformer.push_to_hub) method from the Sentence Transformers library.*
## Full Model Architecture:
```
SentenceTransformer(
  (0): Transformer({'max_seq_length': 8192, 'do_lower_case': False}) with Transformer model: ORTModelForFeatureExtraction 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
)
```
## Tip:
Consider testing this pull request before merging by loading the model from this PR with the `revision` argument:
```python
from sentence_transformers import SentenceTransformer
# TODO: Fill in the PR number
pr_number = 2
model = SentenceTransformer(
    "redis/langcache-embed-v1",
    revision=f"refs/pr/{pr_number}",
    backend="onnx",
)
# Verify that everything works as expected
embeddings = model.encode(["The weather is lovely today.", "It's so sunny outside!", "He drove to the stadium."])
print(embeddings.shape)
similarities = model.similarity(embeddings, embeddings)
print(similarities)
```
- config.json +1 -3
- config_sentence_transformers.json +3 -3
- onnx/model.onnx +3 -0
- tokenizer_config.json +8 -1
| @@ -1,5 +1,4 @@ | |
| 1 | 
             
            {
         | 
| 2 | 
            -
              "_name_or_path": "Alibaba-NLP/gte-modernbert-base",
         | 
| 3 | 
             
              "architectures": [
         | 
| 4 | 
             
                "ModernBertModel"
         | 
| 5 | 
             
              ],
         | 
| @@ -36,12 +35,11 @@ | |
| 36 | 
             
              "num_hidden_layers": 22,
         | 
| 37 | 
             
              "pad_token_id": 50283,
         | 
| 38 | 
             
              "position_embedding_type": "absolute",
         | 
| 39 | 
            -
              "reference_compile": true,
         | 
| 40 | 
             
              "repad_logits_with_grad": false,
         | 
| 41 | 
             
              "sep_token_id": 50282,
         | 
| 42 | 
             
              "sparse_pred_ignore_index": -100,
         | 
| 43 | 
             
              "sparse_prediction": false,
         | 
| 44 | 
             
              "torch_dtype": "float32",
         | 
| 45 | 
            -
              "transformers_version": "4. | 
| 46 | 
             
              "vocab_size": 50368
         | 
| 47 | 
             
            }
         | 
|  | |
| 1 | 
             
            {
         | 
|  | |
| 2 | 
             
              "architectures": [
         | 
| 3 | 
             
                "ModernBertModel"
         | 
| 4 | 
             
              ],
         | 
|  | |
| 35 | 
             
              "num_hidden_layers": 22,
         | 
| 36 | 
             
              "pad_token_id": 50283,
         | 
| 37 | 
             
              "position_embedding_type": "absolute",
         | 
|  | |
| 38 | 
             
              "repad_logits_with_grad": false,
         | 
| 39 | 
             
              "sep_token_id": 50282,
         | 
| 40 | 
             
              "sparse_pred_ignore_index": -100,
         | 
| 41 | 
             
              "sparse_prediction": false,
         | 
| 42 | 
             
              "torch_dtype": "float32",
         | 
| 43 | 
            +
              "transformers_version": "4.51.3",
         | 
| 44 | 
             
              "vocab_size": 50368
         | 
| 45 | 
             
            }
         | 
| @@ -1,8 +1,8 @@ | |
| 1 | 
             
            {
         | 
| 2 | 
             
              "__version__": {
         | 
| 3 | 
            -
                "sentence_transformers": " | 
| 4 | 
            -
                "transformers": "4. | 
| 5 | 
            -
                "pytorch": "2. | 
| 6 | 
             
              },
         | 
| 7 | 
             
              "prompts": {},
         | 
| 8 | 
             
              "default_prompt_name": null,
         | 
|  | |
| 1 | 
             
            {
         | 
| 2 | 
             
              "__version__": {
         | 
| 3 | 
            +
                "sentence_transformers": "4.1.0",
         | 
| 4 | 
            +
                "transformers": "4.51.3",
         | 
| 5 | 
            +
                "pytorch": "2.6.0+cu124"
         | 
| 6 | 
             
              },
         | 
| 7 | 
             
              "prompts": {},
         | 
| 8 | 
             
              "default_prompt_name": null,
         | 
| @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:935ca87f24ea1f7374313cc6498726316214e24aafd14bf4e05bb8d2c31c5150
         | 
| 3 | 
            +
            size 596472567
         | 
| @@ -933,13 +933,20 @@ | |
| 933 | 
             
              "cls_token": "[CLS]",
         | 
| 934 | 
             
              "extra_special_tokens": {},
         | 
| 935 | 
             
              "mask_token": "[MASK]",
         | 
|  | |
| 936 | 
             
              "model_input_names": [
         | 
| 937 | 
             
                "input_ids",
         | 
| 938 | 
             
                "attention_mask"
         | 
| 939 | 
             
              ],
         | 
| 940 | 
            -
              "model_max_length":  | 
|  | |
| 941 | 
             
              "pad_token": "[PAD]",
         | 
|  | |
|  | |
| 942 | 
             
              "sep_token": "[SEP]",
         | 
|  | |
| 943 | 
             
              "tokenizer_class": "PreTrainedTokenizer",
         | 
|  | |
|  | |
| 944 | 
             
              "unk_token": "[UNK]"
         | 
| 945 | 
             
            }
         | 
|  | |
| 933 | 
             
              "cls_token": "[CLS]",
         | 
| 934 | 
             
              "extra_special_tokens": {},
         | 
| 935 | 
             
              "mask_token": "[MASK]",
         | 
| 936 | 
            +
              "max_length": 8192,
         | 
| 937 | 
             
              "model_input_names": [
         | 
| 938 | 
             
                "input_ids",
         | 
| 939 | 
             
                "attention_mask"
         | 
| 940 | 
             
              ],
         | 
| 941 | 
            +
              "model_max_length": 8192,
         | 
| 942 | 
            +
              "pad_to_multiple_of": null,
         | 
| 943 | 
             
              "pad_token": "[PAD]",
         | 
| 944 | 
            +
              "pad_token_type_id": 0,
         | 
| 945 | 
            +
              "padding_side": "right",
         | 
| 946 | 
             
              "sep_token": "[SEP]",
         | 
| 947 | 
            +
              "stride": 0,
         | 
| 948 | 
             
              "tokenizer_class": "PreTrainedTokenizer",
         | 
| 949 | 
            +
              "truncation_side": "right",
         | 
| 950 | 
            +
              "truncation_strategy": "longest_first",
         | 
| 951 | 
             
              "unk_token": "[UNK]"
         | 
| 952 | 
             
            }
         | 
