Add new SentenceTransformer model
Browse files- README.md +94 -90
- config.json +1 -1
- model.safetensors +2 -2
    	
        README.md
    CHANGED
    
    | @@ -16,50 +16,54 @@ tags: | |
| 16 | 
             
            - loss:CoSENTLoss
         | 
| 17 | 
             
            base_model: Alibaba-NLP/gte-modernbert-base
         | 
| 18 | 
             
            widget:
         | 
| 19 | 
            -
            - source_sentence:  | 
| 20 | 
            -
                 | 
| 21 | 
             
              sentences:
         | 
| 22 | 
            -
              -  | 
| 23 | 
            -
                 | 
| 24 | 
            -
              -  | 
| 25 | 
            -
                 | 
| 26 | 
            -
              -  | 
| 27 | 
            -
                 | 
| 28 | 
            -
            - source_sentence:  | 
| 29 | 
            -
                 | 
| 30 | 
             
              sentences:
         | 
| 31 | 
            -
              -  | 
| 32 | 
            -
                 | 
| 33 | 
            -
              -  | 
| 34 | 
            -
                 | 
| 35 | 
            -
              -  | 
| 36 | 
            -
                 | 
| 37 | 
            -
             | 
| 38 | 
            -
             | 
|  | |
| 39 | 
             
              sentences:
         | 
| 40 | 
            -
              -  | 
| 41 | 
            -
                 | 
| 42 | 
            -
              -  | 
| 43 | 
            -
             | 
| 44 | 
            -
             | 
| 45 | 
            -
             | 
| 46 | 
            -
             | 
|  | |
| 47 | 
             
              sentences:
         | 
| 48 | 
            -
              -  | 
| 49 | 
            -
             | 
| 50 | 
            -
             | 
| 51 | 
            -
             | 
| 52 | 
            -
             | 
| 53 | 
            -
             | 
| 54 | 
            -
             | 
| 55 | 
            -
                 | 
| 56 | 
             
              sentences:
         | 
| 57 | 
            -
              -  | 
| 58 | 
            -
                 | 
| 59 | 
            -
             | 
| 60 | 
            -
             | 
| 61 | 
            -
             | 
| 62 | 
            -
                 | 
|  | |
|  | |
| 63 | 
             
            datasets:
         | 
| 64 | 
             
            - redis/langcache-sentencepairs-v1
         | 
| 65 | 
             
            pipeline_tag: sentence-similarity
         | 
| @@ -84,28 +88,28 @@ model-index: | |
| 84 | 
             
                  type: val
         | 
| 85 | 
             
                metrics:
         | 
| 86 | 
             
                - type: cosine_accuracy
         | 
| 87 | 
            -
                  value: 0. | 
| 88 | 
             
                  name: Cosine Accuracy
         | 
| 89 | 
             
                - type: cosine_accuracy_threshold
         | 
| 90 | 
            -
                  value: 0. | 
| 91 | 
             
                  name: Cosine Accuracy Threshold
         | 
| 92 | 
             
                - type: cosine_f1
         | 
| 93 | 
            -
                  value: 0. | 
| 94 | 
             
                  name: Cosine F1
         | 
| 95 | 
             
                - type: cosine_f1_threshold
         | 
| 96 | 
            -
                  value: 0. | 
| 97 | 
             
                  name: Cosine F1 Threshold
         | 
| 98 | 
             
                - type: cosine_precision
         | 
| 99 | 
            -
                  value: 0. | 
| 100 | 
             
                  name: Cosine Precision
         | 
| 101 | 
             
                - type: cosine_recall
         | 
| 102 | 
            -
                  value: 0. | 
| 103 | 
             
                  name: Cosine Recall
         | 
| 104 | 
             
                - type: cosine_ap
         | 
| 105 | 
            -
                  value: 0. | 
| 106 | 
             
                  name: Cosine Ap
         | 
| 107 | 
             
                - type: cosine_mcc
         | 
| 108 | 
            -
                  value: 0. | 
| 109 | 
             
                  name: Cosine Mcc
         | 
| 110 | 
             
              - task:
         | 
| 111 | 
             
                  type: binary-classification
         | 
| @@ -115,28 +119,28 @@ model-index: | |
| 115 | 
             
                  type: test
         | 
| 116 | 
             
                metrics:
         | 
| 117 | 
             
                - type: cosine_accuracy
         | 
| 118 | 
            -
                  value: 0. | 
| 119 | 
             
                  name: Cosine Accuracy
         | 
| 120 | 
             
                - type: cosine_accuracy_threshold
         | 
| 121 | 
            -
                  value: 0. | 
| 122 | 
             
                  name: Cosine Accuracy Threshold
         | 
| 123 | 
             
                - type: cosine_f1
         | 
| 124 | 
            -
                  value: 0. | 
| 125 | 
             
                  name: Cosine F1
         | 
| 126 | 
             
                - type: cosine_f1_threshold
         | 
| 127 | 
            -
                  value: 0. | 
| 128 | 
             
                  name: Cosine F1 Threshold
         | 
| 129 | 
             
                - type: cosine_precision
         | 
| 130 | 
            -
                  value: 0. | 
| 131 | 
             
                  name: Cosine Precision
         | 
| 132 | 
             
                - type: cosine_recall
         | 
| 133 | 
            -
                  value: 0. | 
| 134 | 
             
                  name: Cosine Recall
         | 
| 135 | 
             
                - type: cosine_ap
         | 
| 136 | 
            -
                  value: 0. | 
| 137 | 
             
                  name: Cosine Ap
         | 
| 138 | 
             
                - type: cosine_mcc
         | 
| 139 | 
            -
                  value: 0. | 
| 140 | 
             
                  name: Cosine Mcc
         | 
| 141 | 
             
            ---
         | 
| 142 |  | 
| @@ -190,9 +194,9 @@ from sentence_transformers import SentenceTransformer | |
| 190 | 
             
            model = SentenceTransformer("redis/langcache-embed-v3")
         | 
| 191 | 
             
            # Run inference
         | 
| 192 | 
             
            sentences = [
         | 
| 193 | 
            -
                ' | 
| 194 | 
            -
                ' | 
| 195 | 
            -
                ' | 
| 196 | 
             
            ]
         | 
| 197 | 
             
            embeddings = model.encode(sentences)
         | 
| 198 | 
             
            print(embeddings.shape)
         | 
| @@ -201,9 +205,9 @@ print(embeddings.shape) | |
| 201 | 
             
            # Get the similarity scores for the embeddings
         | 
| 202 | 
             
            similarities = model.similarity(embeddings, embeddings)
         | 
| 203 | 
             
            print(similarities)
         | 
| 204 | 
            -
            # tensor([[1. | 
| 205 | 
            -
            #         [0. | 
| 206 | 
            -
            #         [0. | 
| 207 | 
             
            ```
         | 
| 208 |  | 
| 209 | 
             
            <!--
         | 
| @@ -241,14 +245,14 @@ You can finetune this model on your own dataset. | |
| 241 |  | 
| 242 | 
             
            | Metric                    | val        | test       |
         | 
| 243 | 
             
            |:--------------------------|:-----------|:-----------|
         | 
| 244 | 
            -
            | cosine_accuracy           | 0. | 
| 245 | 
            -
            | cosine_accuracy_threshold | 0.8641     | 0. | 
| 246 | 
            -
            | cosine_f1                 | 0. | 
| 247 | 
            -
            | cosine_f1_threshold       | 0. | 
| 248 | 
            -
            | cosine_precision          | 0.6289     | 0. | 
| 249 | 
            -
            | cosine_recall             | 0. | 
| 250 | 
            -
            | **cosine_ap**             | **0. | 
| 251 | 
            -
            | cosine_mcc                | 0. | 
| 252 |  | 
| 253 | 
             
            <!--
         | 
| 254 | 
             
            ## Bias, Risks and Limitations
         | 
| @@ -269,19 +273,19 @@ You can finetune this model on your own dataset. | |
| 269 | 
             
            #### LangCache Sentence Pairs (all)
         | 
| 270 |  | 
| 271 | 
             
            * Dataset: [LangCache Sentence Pairs (all)](https://huggingface.co/datasets/redis/langcache-sentencepairs-v1)
         | 
| 272 | 
            -
            * Size:  | 
| 273 | 
             
            * Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>label</code>
         | 
| 274 | 
             
            * Approximate statistics based on the first 1000 samples:
         | 
| 275 | 
            -
              |         | sentence1                                                                         | sentence2 | 
| 276 | 
            -
               | 
| 277 | 
            -
              | type    | string                                                                            | string | 
| 278 | 
            -
              | details | <ul><li>min:  | 
| 279 | 
             
            * Samples:
         | 
| 280 | 
            -
              | sentence1 | 
| 281 | 
            -
               | 
| 282 | 
            -
              | <code> | 
| 283 | 
            -
              | <code> | 
| 284 | 
            -
              | <code> | 
| 285 | 
             
            * Loss: [<code>CoSENTLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cosentloss) with these parameters:
         | 
| 286 | 
             
              ```json
         | 
| 287 | 
             
              {
         | 
| @@ -295,19 +299,19 @@ You can finetune this model on your own dataset. | |
| 295 | 
             
            #### LangCache Sentence Pairs (all)
         | 
| 296 |  | 
| 297 | 
             
            * Dataset: [LangCache Sentence Pairs (all)](https://huggingface.co/datasets/redis/langcache-sentencepairs-v1)
         | 
| 298 | 
            -
            * Size:  | 
| 299 | 
             
            * Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>label</code>
         | 
| 300 | 
             
            * Approximate statistics based on the first 1000 samples:
         | 
| 301 | 
            -
              |         | sentence1                                                                         | sentence2 | 
| 302 | 
            -
               | 
| 303 | 
            -
              | type    | string                                                                            | string | 
| 304 | 
            -
              | details | <ul><li>min:  | 
| 305 | 
             
            * Samples:
         | 
| 306 | 
            -
              | sentence1 | 
| 307 | 
            -
               | 
| 308 | 
            -
              | <code> | 
| 309 | 
            -
              | <code> | 
| 310 | 
            -
              | <code> | 
| 311 | 
             
            * Loss: [<code>CoSENTLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cosentloss) with these parameters:
         | 
| 312 | 
             
              ```json
         | 
| 313 | 
             
              {
         | 
| @@ -319,7 +323,7 @@ You can finetune this model on your own dataset. | |
| 319 | 
             
            ### Training Logs
         | 
| 320 | 
             
            | Epoch | Step | val_cosine_ap | test_cosine_ap |
         | 
| 321 | 
             
            |:-----:|:----:|:-------------:|:--------------:|
         | 
| 322 | 
            -
            | -1    | -1   | 0. | 
| 323 |  | 
| 324 |  | 
| 325 | 
             
            ### Framework Versions
         | 
|  | |
| 16 | 
             
            - loss:CoSENTLoss
         | 
| 17 | 
             
            base_model: Alibaba-NLP/gte-modernbert-base
         | 
| 18 | 
             
            widget:
         | 
| 19 | 
            +
            - source_sentence: That is evident from their failure , three times in a row , to
         | 
| 20 | 
            +
                get a big enough turnout to elect a president .
         | 
| 21 | 
             
              sentences:
         | 
| 22 | 
            +
              - 'given a text, decide to which of a predefined set of classes it belongs.  examples:
         | 
| 23 | 
            +
                language identification, genre classification, sentiment analysis, and spam detection'
         | 
| 24 | 
            +
              - Three times in a row , they failed to get a big _ enough turnout to elect a president
         | 
| 25 | 
            +
                .
         | 
| 26 | 
            +
              - He said the Government still did not know the real reason the original Saudi buyer
         | 
| 27 | 
            +
                pulled out on August 21 .
         | 
| 28 | 
            +
            - source_sentence: these use built-in and learned knowledge to make decisions and
         | 
| 29 | 
            +
                accomplish tasks that fulfill the intentions of the user.
         | 
| 30 | 
             
              sentences:
         | 
| 31 | 
            +
              - It also features a 4.5 in back-lit LCD screen and memory expansion facilities
         | 
| 32 | 
            +
                .
         | 
| 33 | 
            +
              - '- set of interrelated components - collect, process, store and distribute info.
         | 
| 34 | 
            +
                - support decision-making, coordination, and control'
         | 
| 35 | 
            +
              - software programs that work without direct human intervention to carry out specific
         | 
| 36 | 
            +
                tasks for an individual user, business process, or software application -siri
         | 
| 37 | 
            +
                adapts to your preferences over time
         | 
| 38 | 
            +
            - source_sentence: any location in storage can be accessed at any moment in approximately
         | 
| 39 | 
            +
                the same amount of time.
         | 
| 40 | 
             
              sentences:
         | 
| 41 | 
            +
              - your study can adopt the original model used by the cited theorist but you can
         | 
| 42 | 
            +
                modify different variables depending on your study of the whole theory
         | 
| 43 | 
            +
              - an access method that can access any storage location directly and in any order;
         | 
| 44 | 
            +
                primary storage devices and disk storage devices use random access...
         | 
| 45 | 
            +
              - Branson said that his preference would be to operate a fully commercial service
         | 
| 46 | 
            +
                on routes to New York , Barbados and Dubai .
         | 
| 47 | 
            +
            - source_sentence: United issued a statement saying it will " work professionally
         | 
| 48 | 
            +
                and cooperatively with all its unions . "
         | 
| 49 | 
             
              sentences:
         | 
| 50 | 
            +
              - network that acts like the human brain; type of ai
         | 
| 51 | 
            +
              - a database system consists of one or more databases and a database management
         | 
| 52 | 
            +
                system (dbms).
         | 
| 53 | 
            +
              - Senior vice president Sara Fields said the airline " will work professionally
         | 
| 54 | 
            +
                and cooperatively with all our unions . "
         | 
| 55 | 
            +
            - source_sentence: A European Union spokesman said the Commission was consulting EU
         | 
| 56 | 
            +
                member states " with a view to taking appropriate action if necessary " on the
         | 
| 57 | 
            +
                matter .
         | 
| 58 | 
             
              sentences:
         | 
| 59 | 
            +
              - Justice Minister Martin Cauchon and Prime Minister Jean Chretien both have said
         | 
| 60 | 
            +
                the government will introduce legislation to decriminalize possession of small
         | 
| 61 | 
            +
                amounts of pot .
         | 
| 62 | 
            +
              - Laos 's second most important export destination - said it was consulting EU member
         | 
| 63 | 
            +
                states ' ' with a view to taking appropriate action if necessary ' ' on the matter
         | 
| 64 | 
            +
                .
         | 
| 65 | 
            +
              - the form data assumes and the possible range of values that the attribute defined
         | 
| 66 | 
            +
                as that type of data may express  1. text 2. numerical
         | 
| 67 | 
             
            datasets:
         | 
| 68 | 
             
            - redis/langcache-sentencepairs-v1
         | 
| 69 | 
             
            pipeline_tag: sentence-similarity
         | 
|  | |
| 88 | 
             
                  type: val
         | 
| 89 | 
             
                metrics:
         | 
| 90 | 
             
                - type: cosine_accuracy
         | 
| 91 | 
            +
                  value: 0.7638310529446758
         | 
| 92 | 
             
                  name: Cosine Accuracy
         | 
| 93 | 
             
                - type: cosine_accuracy_threshold
         | 
| 94 | 
            +
                  value: 0.8640533685684204
         | 
| 95 | 
             
                  name: Cosine Accuracy Threshold
         | 
| 96 | 
             
                - type: cosine_f1
         | 
| 97 | 
            +
                  value: 0.6912742186395134
         | 
| 98 | 
             
                  name: Cosine F1
         | 
| 99 | 
             
                - type: cosine_f1_threshold
         | 
| 100 | 
            +
                  value: 0.825770378112793
         | 
| 101 | 
             
                  name: Cosine F1 Threshold
         | 
| 102 | 
             
                - type: cosine_precision
         | 
| 103 | 
            +
                  value: 0.6289243437982501
         | 
| 104 | 
             
                  name: Cosine Precision
         | 
| 105 | 
             
                - type: cosine_recall
         | 
| 106 | 
            +
                  value: 0.7673469387755102
         | 
| 107 | 
             
                  name: Cosine Recall
         | 
| 108 | 
             
                - type: cosine_ap
         | 
| 109 | 
            +
                  value: 0.7353968345121902
         | 
| 110 | 
             
                  name: Cosine Ap
         | 
| 111 | 
             
                - type: cosine_mcc
         | 
| 112 | 
            +
                  value: 0.4778469995044085
         | 
| 113 | 
             
                  name: Cosine Mcc
         | 
| 114 | 
             
              - task:
         | 
| 115 | 
             
                  type: binary-classification
         | 
|  | |
| 119 | 
             
                  type: test
         | 
| 120 | 
             
                metrics:
         | 
| 121 | 
             
                - type: cosine_accuracy
         | 
| 122 | 
            +
                  value: 0.7037777526966672
         | 
| 123 | 
             
                  name: Cosine Accuracy
         | 
| 124 | 
             
                - type: cosine_accuracy_threshold
         | 
| 125 | 
            +
                  value: 0.8524033427238464
         | 
| 126 | 
             
                  name: Cosine Accuracy Threshold
         | 
| 127 | 
             
                - type: cosine_f1
         | 
| 128 | 
            +
                  value: 0.7122170715871171
         | 
| 129 | 
             
                  name: Cosine F1
         | 
| 130 | 
             
                - type: cosine_f1_threshold
         | 
| 131 | 
            +
                  value: 0.8118724822998047
         | 
| 132 | 
             
                  name: Cosine F1 Threshold
         | 
| 133 | 
             
                - type: cosine_precision
         | 
| 134 | 
            +
                  value: 0.5989283084033827
         | 
| 135 | 
             
                  name: Cosine Precision
         | 
| 136 | 
             
                - type: cosine_recall
         | 
| 137 | 
            +
                  value: 0.8783612662942272
         | 
| 138 | 
             
                  name: Cosine Recall
         | 
| 139 | 
             
                - type: cosine_ap
         | 
| 140 | 
            +
                  value: 0.6476665223951498
         | 
| 141 | 
             
                  name: Cosine Ap
         | 
| 142 | 
             
                - type: cosine_mcc
         | 
| 143 | 
            +
                  value: 0.44182914870985407
         | 
| 144 | 
             
                  name: Cosine Mcc
         | 
| 145 | 
             
            ---
         | 
| 146 |  | 
|  | |
| 194 | 
             
            model = SentenceTransformer("redis/langcache-embed-v3")
         | 
| 195 | 
             
            # Run inference
         | 
| 196 | 
             
            sentences = [
         | 
| 197 | 
            +
                'A European Union spokesman said the Commission was consulting EU member states " with a view to taking appropriate action if necessary " on the matter .',
         | 
| 198 | 
            +
                "Laos 's second most important export destination - said it was consulting EU member states ' ' with a view to taking appropriate action if necessary ' ' on the matter .",
         | 
| 199 | 
            +
                'the form data assumes and the possible range of values that the attribute defined as that type of data may express  1. text 2. numerical',
         | 
| 200 | 
             
            ]
         | 
| 201 | 
             
            embeddings = model.encode(sentences)
         | 
| 202 | 
             
            print(embeddings.shape)
         | 
|  | |
| 205 | 
             
            # Get the similarity scores for the embeddings
         | 
| 206 | 
             
            similarities = model.similarity(embeddings, embeddings)
         | 
| 207 | 
             
            print(similarities)
         | 
| 208 | 
            +
            # tensor([[1.0078, 0.8789, 0.4961],
         | 
| 209 | 
            +
            #         [0.8789, 1.0000, 0.4648],
         | 
| 210 | 
            +
            #         [0.4961, 0.4648, 1.0078]], dtype=torch.bfloat16)
         | 
| 211 | 
             
            ```
         | 
| 212 |  | 
| 213 | 
             
            <!--
         | 
|  | |
| 245 |  | 
| 246 | 
             
            | Metric                    | val        | test       |
         | 
| 247 | 
             
            |:--------------------------|:-----------|:-----------|
         | 
| 248 | 
            +
            | cosine_accuracy           | 0.7638     | 0.7038     |
         | 
| 249 | 
            +
            | cosine_accuracy_threshold | 0.8641     | 0.8524     |
         | 
| 250 | 
            +
            | cosine_f1                 | 0.6913     | 0.7122     |
         | 
| 251 | 
            +
            | cosine_f1_threshold       | 0.8258     | 0.8119     |
         | 
| 252 | 
            +
            | cosine_precision          | 0.6289     | 0.5989     |
         | 
| 253 | 
            +
            | cosine_recall             | 0.7673     | 0.8784     |
         | 
| 254 | 
            +
            | **cosine_ap**             | **0.7354** | **0.6477** |
         | 
| 255 | 
            +
            | cosine_mcc                | 0.4778     | 0.4418     |
         | 
| 256 |  | 
| 257 | 
             
            <!--
         | 
| 258 | 
             
            ## Bias, Risks and Limitations
         | 
|  | |
| 273 | 
             
            #### LangCache Sentence Pairs (all)
         | 
| 274 |  | 
| 275 | 
             
            * Dataset: [LangCache Sentence Pairs (all)](https://huggingface.co/datasets/redis/langcache-sentencepairs-v1)
         | 
| 276 | 
            +
            * Size: 8,405 training samples
         | 
| 277 | 
             
            * Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>label</code>
         | 
| 278 | 
             
            * Approximate statistics based on the first 1000 samples:
         | 
| 279 | 
            +
              |         | sentence1                                                                         | sentence2                                                                        | label                                           |
         | 
| 280 | 
            +
              |:--------|:----------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|:------------------------------------------------|
         | 
| 281 | 
            +
              | type    | string                                                                            | string                                                                           | int                                             |
         | 
| 282 | 
            +
              | details | <ul><li>min: 6 tokens</li><li>mean: 24.89 tokens</li><li>max: 50 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 24.3 tokens</li><li>max: 43 tokens</li></ul> | <ul><li>0: ~45.80%</li><li>1: ~54.20%</li></ul> |
         | 
| 283 | 
             
            * Samples:
         | 
| 284 | 
            +
              | sentence1                                                                                                                             | sentence2                                                                                                                                          | label          |
         | 
| 285 | 
            +
              |:--------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------|:---------------|
         | 
| 286 | 
            +
              | <code>He said the foodservice pie business doesn 't fit the company 's long-term growth strategy .</code>                             | <code>" The foodservice pie business does not fit our long-term growth strategy .</code>                                                           | <code>1</code> |
         | 
| 287 | 
            +
              | <code>Magnarelli said Racicot hated the Iraqi regime and looked forward to using his long years of training in the war .</code>       | <code>His wife said he was " 100 percent behind George Bush " and looked forward to using his years of training in the war .</code>                | <code>0</code> |
         | 
| 288 | 
            +
              | <code>The dollar was at 116.92 yen against the yen , flat on the session , and at 1.2891 against the Swiss franc , also flat .</code> | <code>The dollar was at 116.78 yen JPY = , virtually flat on the session , and at 1.2871 against the Swiss franc CHF = , down 0.1 percent .</code> | <code>0</code> |
         | 
| 289 | 
             
            * Loss: [<code>CoSENTLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cosentloss) with these parameters:
         | 
| 290 | 
             
              ```json
         | 
| 291 | 
             
              {
         | 
|  | |
| 299 | 
             
            #### LangCache Sentence Pairs (all)
         | 
| 300 |  | 
| 301 | 
             
            * Dataset: [LangCache Sentence Pairs (all)](https://huggingface.co/datasets/redis/langcache-sentencepairs-v1)
         | 
| 302 | 
            +
            * Size: 8,405 evaluation samples
         | 
| 303 | 
             
            * Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>label</code>
         | 
| 304 | 
             
            * Approximate statistics based on the first 1000 samples:
         | 
| 305 | 
            +
              |         | sentence1                                                                         | sentence2                                                                        | label                                           |
         | 
| 306 | 
            +
              |:--------|:----------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|:------------------------------------------------|
         | 
| 307 | 
            +
              | type    | string                                                                            | string                                                                           | int                                             |
         | 
| 308 | 
            +
              | details | <ul><li>min: 6 tokens</li><li>mean: 24.89 tokens</li><li>max: 50 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 24.3 tokens</li><li>max: 43 tokens</li></ul> | <ul><li>0: ~45.80%</li><li>1: ~54.20%</li></ul> |
         | 
| 309 | 
             
            * Samples:
         | 
| 310 | 
            +
              | sentence1                                                                                                                             | sentence2                                                                                                                                          | label          |
         | 
| 311 | 
            +
              |:--------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------|:---------------|
         | 
| 312 | 
            +
              | <code>He said the foodservice pie business doesn 't fit the company 's long-term growth strategy .</code>                             | <code>" The foodservice pie business does not fit our long-term growth strategy .</code>                                                           | <code>1</code> |
         | 
| 313 | 
            +
              | <code>Magnarelli said Racicot hated the Iraqi regime and looked forward to using his long years of training in the war .</code>       | <code>His wife said he was " 100 percent behind George Bush " and looked forward to using his years of training in the war .</code>                | <code>0</code> |
         | 
| 314 | 
            +
              | <code>The dollar was at 116.92 yen against the yen , flat on the session , and at 1.2891 against the Swiss franc , also flat .</code> | <code>The dollar was at 116.78 yen JPY = , virtually flat on the session , and at 1.2871 against the Swiss franc CHF = , down 0.1 percent .</code> | <code>0</code> |
         | 
| 315 | 
             
            * Loss: [<code>CoSENTLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cosentloss) with these parameters:
         | 
| 316 | 
             
              ```json
         | 
| 317 | 
             
              {
         | 
|  | |
| 323 | 
             
            ### Training Logs
         | 
| 324 | 
             
            | Epoch | Step | val_cosine_ap | test_cosine_ap |
         | 
| 325 | 
             
            |:-----:|:----:|:-------------:|:--------------:|
         | 
| 326 | 
            +
            | -1    | -1   | 0.7354        | 0.6477         |
         | 
| 327 |  | 
| 328 |  | 
| 329 | 
             
            ### Framework Versions
         | 
    	
        config.json
    CHANGED
    
    | @@ -12,7 +12,7 @@ | |
| 12 | 
             
              "cls_token_id": 50281,
         | 
| 13 | 
             
              "decoder_bias": true,
         | 
| 14 | 
             
              "deterministic_flash_attn": false,
         | 
| 15 | 
            -
              "dtype": " | 
| 16 | 
             
              "embedding_dropout": 0.0,
         | 
| 17 | 
             
              "eos_token_id": 50282,
         | 
| 18 | 
             
              "global_attn_every_n_layers": 3,
         | 
|  | |
| 12 | 
             
              "cls_token_id": 50281,
         | 
| 13 | 
             
              "decoder_bias": true,
         | 
| 14 | 
             
              "deterministic_flash_attn": false,
         | 
| 15 | 
            +
              "dtype": "bfloat16",
         | 
| 16 | 
             
              "embedding_dropout": 0.0,
         | 
| 17 | 
             
              "eos_token_id": 50282,
         | 
| 18 | 
             
              "global_attn_every_n_layers": 3,
         | 
    	
        model.safetensors
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
            -
            size  | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:95d02211c4cca89113f9f3e93ed91f5176bf50170faa2cb835f7bfea15bb9dd2
         | 
| 3 | 
            +
            size 298041696
         | 

