rjurney commited on
Commit
a31f1fd
·
unverified ·
1 Parent(s): a32b389

Disabled a lot of training optimizations I had introduced in this run: eridu train --use-gpu --batch-size 1000 --epochs 8 --patience 1 --resampling --weight-decay 0.01 --random-seed 31337 --warmup-ratio 0.1 --learning-rate 3e-5 --save-strategy steps --eval-strategy steps --sample-fraction 0.1

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. README.md +163 -107
  2. checkpoint-1000/1_Pooling/config.json +10 -0
  3. checkpoint-1000/README.md +466 -0
  4. checkpoint-1000/config.json +25 -0
  5. checkpoint-1000/config_sentence_transformers.json +10 -0
  6. checkpoint-1000/model.safetensors +3 -0
  7. checkpoint-1000/modules.json +14 -0
  8. checkpoint-1000/optimizer.pt +3 -0
  9. checkpoint-1000/rng_state.pth +3 -0
  10. checkpoint-1000/scheduler.pt +3 -0
  11. checkpoint-1000/sentence_bert_config.json +4 -0
  12. checkpoint-1000/special_tokens_map.json +51 -0
  13. checkpoint-1000/tokenizer.json +3 -0
  14. checkpoint-1000/tokenizer_config.json +65 -0
  15. checkpoint-1000/trainer_state.json +217 -0
  16. checkpoint-1000/training_args.bin +3 -0
  17. checkpoint-1000/unigram.json +3 -0
  18. checkpoint-1100/1_Pooling/config.json +10 -0
  19. checkpoint-1100/README.md +467 -0
  20. checkpoint-1100/config.json +25 -0
  21. checkpoint-1100/config_sentence_transformers.json +10 -0
  22. checkpoint-1100/model.safetensors +3 -0
  23. checkpoint-1100/modules.json +14 -0
  24. checkpoint-1100/optimizer.pt +3 -0
  25. checkpoint-1100/rng_state.pth +3 -0
  26. checkpoint-1100/scheduler.pt +3 -0
  27. checkpoint-1100/sentence_bert_config.json +4 -0
  28. checkpoint-1100/special_tokens_map.json +51 -0
  29. checkpoint-1100/tokenizer.json +3 -0
  30. checkpoint-1100/tokenizer_config.json +65 -0
  31. checkpoint-1100/trainer_state.json +233 -0
  32. checkpoint-1100/training_args.bin +3 -0
  33. checkpoint-1100/unigram.json +3 -0
  34. checkpoint-1200/1_Pooling/config.json +10 -0
  35. checkpoint-1200/README.md +468 -0
  36. checkpoint-1200/config.json +25 -0
  37. checkpoint-1200/config_sentence_transformers.json +10 -0
  38. checkpoint-1200/model.safetensors +3 -0
  39. checkpoint-1200/modules.json +14 -0
  40. checkpoint-1200/optimizer.pt +3 -0
  41. checkpoint-1200/rng_state.pth +3 -0
  42. checkpoint-1200/scheduler.pt +3 -0
  43. checkpoint-1200/sentence_bert_config.json +4 -0
  44. checkpoint-1200/special_tokens_map.json +51 -0
  45. checkpoint-1200/tokenizer.json +3 -0
  46. checkpoint-1200/tokenizer_config.json +65 -0
  47. checkpoint-1200/trainer_state.json +249 -0
  48. checkpoint-1200/training_args.bin +3 -0
  49. checkpoint-1200/unigram.json +3 -0
  50. checkpoint-1300/1_Pooling/config.json +10 -0
README.md CHANGED
@@ -7,34 +7,35 @@ tags:
7
  - sentence-similarity
8
  - feature-extraction
9
  - generated_from_trainer
10
- - dataset_size:2130620
11
  - loss:ContrastiveLoss
12
  base_model: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
13
  widget:
14
- - source_sentence: Russell Jurney
15
  sentences:
16
- - Russell H. Jurney
17
- - Russ Jurney
18
- - Русс Джерни
19
- - source_sentence: Ben Lorica
20
  sentences:
21
- - Benjamin Lorica
22
- - 罗瑞卡
23
- - 罗睿姬
24
- - source_sentence: Yevgeny Prigozhin
25
  sentences:
26
- - Евге́ний Ви́кторович Приго́жин
27
- - Y. Prighozhin
28
- - source_sentence: M.R. James
 
29
  sentences:
30
- - Montague Rhodes James
31
- - J.R. James
32
- - Mr. James
33
- - source_sentence: Muhammad Ali
34
  sentences:
35
- - مُحَمَّد عَلِيّ
36
- - Mohammed Ali
37
- - Sonny Liston
38
  pipeline_tag: sentence-similarity
39
  library_name: sentence-transformers
40
  metrics:
@@ -57,38 +58,81 @@ model-index:
57
  type: sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2
58
  metrics:
59
  - type: cosine_accuracy
60
- value: 0.9905380542935456
61
  name: Cosine Accuracy
62
  - type: cosine_accuracy_threshold
63
- value: 0.6790644526481628
64
  name: Cosine Accuracy Threshold
65
  - type: cosine_f1
66
- value: 0.9856131536880567
67
  name: Cosine F1
68
  - type: cosine_f1_threshold
69
- value: 0.6790644526481628
70
  name: Cosine F1 Threshold
71
  - type: cosine_precision
72
- value: 0.9816899806664392
73
  name: Cosine Precision
74
  - type: cosine_recall
75
- value: 0.9895678092399404
76
  name: Cosine Recall
77
  - type: cosine_ap
78
- value: 0.9977983578816215
79
  name: Cosine Ap
80
  - type: cosine_mcc
81
- value: 0.9785817179348335
82
  name: Cosine Mcc
83
  ---
84
 
85
  # Graphlet-AI/eridu
86
 
 
 
87
  This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2) for person and company name matching using the [Open Sanctions matcher training data](https://www.opensanctions.org/docs/pairs/). It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used as part of a deep, fuzzy entity resolution process.
88
 
89
  ## Model Details
90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  ### Model Description
 
92
  - **Model Type:** Sentence Transformer
93
  - **Base model:** [sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2) <!-- at revision 86741b4e3f5cb7765a600d3a3d55a0f6a6cb443d -->
94
  - **Maximum Sequence Length:** 128 tokens
@@ -103,6 +147,7 @@ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [s
103
  - **Documentation:** [Graphlet-AI/eridu Documentation](https://github.com/Graphlet-AI/eridu)
104
  - **Repository:** [Graphlet-AI/eridu on GitHub](https://github.com/Graphlet-AI/eridu)
105
  - **Hugging Face:** [Graphlet-AI/eridu on Hugging Face](https://huggingface.co/Graphlet-AI/eridu)
 
106
 
107
  ### Full Model Architecture
108
 
@@ -124,19 +169,20 @@ pip install -U sentence-transformers
124
  ```
125
 
126
  Then you can load this model and run inference.
 
127
  ```python
128
  from sentence_transformers import SentenceTransformer
129
 
 
130
  # Download from the 🤗 Hub
131
  model = SentenceTransformer("Graphlet-AI/eridu")
132
-
133
- names = [
134
- "Russell Jurney",
135
- "Russ Jurney",
136
- "Русс Джерни",
137
  ]
138
-
139
- embeddings = model.encode(names)
140
  print(embeddings.shape)
141
  # [3, 384]
142
 
@@ -144,11 +190,6 @@ print(embeddings.shape)
144
  similarities = model.similarity(embeddings, embeddings)
145
  print(similarities.shape)
146
  # [3, 3]
147
-
148
- print(similarities.numpy())
149
- # [[0.9999999 0.99406826 0.99406105]
150
- # [0.9940683 1. 0.9969202 ]
151
- # [0.99406105 0.9969202 1. ]]
152
  ```
153
 
154
  <!--
@@ -162,7 +203,7 @@ print(similarities.numpy())
162
  <!--
163
  ### Downstream Usage (Sentence Transformers)
164
 
165
- You can fine-tune this model on your own dataset.
166
 
167
  <details><summary>Click to expand</summary>
168
 
@@ -181,19 +222,19 @@ You can fine-tune this model on your own dataset.
181
 
182
  #### Binary Classification
183
 
184
- * Dataset: `sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2`
185
- * Evaluated with [<code>BinaryClassificationEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.BinaryClassificationEvaluator)
186
 
187
  | Metric | Value |
188
  |:--------------------------|:-----------|
189
- | cosine_accuracy | 0.9905 |
190
- | cosine_accuracy_threshold | 0.6791 |
191
- | cosine_f1 | 0.9856 |
192
- | cosine_f1_threshold | 0.6791 |
193
- | cosine_precision | 0.9817 |
194
- | cosine_recall | 0.9896 |
195
- | **cosine_ap** | **0.9978** |
196
- | cosine_mcc | 0.9786 |
197
 
198
  <!--
199
  ## Bias, Risks and Limitations
@@ -213,20 +254,25 @@ You can fine-tune this model on your own dataset.
213
 
214
  #### Unnamed Dataset
215
 
216
- * Size: 2,130,620 training samples
217
- * Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>label</code>
218
- * Approximate statistics based on the first 1000 samples:
 
219
  | | sentence1 | sentence2 | label |
220
  |:--------|:---------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|:---------------------------------------------------------------|
221
  | type | string | string | float |
222
- | details | <ul><li>min: 3 tokens</li><li>mean: 9.28 tokens</li><li>max: 57 tokens</li></ul> | <ul><li>min: 3 tokens</li><li>mean: 9.11 tokens</li><li>max: 65 tokens</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.34</li><li>max: 1.0</li></ul> |
223
- * Samples:
224
- | sentence1 | sentence2 | label |
225
- |:----------------------------|:-------------------------------|:-----------------|
226
- | <code>ג'ק וייט</code> | <code>Jack White</code> | <code>1.0</code> |
227
- | <code>Абдуллоҳ Гул</code> | <code>Савицкая Светлана</code> | <code>0.0</code> |
228
- | <code>ショーン・ジャスティン・ペン</code> | <code>شان پن</code> | <code>1.0</code> |
229
- * Loss: [<code>ContrastiveLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#contrastiveloss) with these parameters:
 
 
 
 
230
  ```json
231
  {
232
  "distance_metric": "SiameseDistanceMetric.COSINE_DISTANCE",
@@ -239,20 +285,25 @@ You can fine-tune this model on your own dataset.
239
 
240
  #### Unnamed Dataset
241
 
242
- * Size: 266,328 evaluation samples
243
- * Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>label</code>
244
- * Approximate statistics based on the first 1000 samples:
245
- | | sentence1 | sentence2 | label |
246
- |:--------|:---------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|:---------------------------------------------------------------|
247
- | type | string | string | float |
248
- | details | <ul><li>min: 3 tokens</li><li>mean: 9.27 tokens</li><li>max: 79 tokens</li></ul> | <ul><li>min: 3 tokens</li><li>mean: 8.99 tokens</li><li>max: 61 tokens</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.32</li><li>max: 1.0</li></ul> |
249
- * Samples:
250
- | sentence1 | sentence2 | label |
251
- |:---------------------------------------------|:-----------------------------------------------|:-----------------|
252
- | <code>Анатолий Николаевич Герасимов</code> | <code>Anatoli Nikolajewitsch Gerassimow</code> | <code>1.0</code> |
253
- | <code>Igor Stanislavovitsj Prokopenko</code> | <code>Angelo Lauricella</code> | <code>0.0</code> |
254
- | <code>Кофе, Линда</code> | <code>Святлана Яўгенаўна Савіцкая</code> | <code>0.0</code> |
255
- * Loss: [<code>ContrastiveLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#contrastiveloss) with these parameters:
 
 
 
 
 
256
  ```json
257
  {
258
  "distance_metric": "SiameseDistanceMetric.COSINE_DISTANCE",
@@ -262,41 +313,43 @@ You can fine-tune this model on your own dataset.
262
  ```
263
 
264
  ### Training Hyperparameters
 
265
  #### Non-Default Hyperparameters
266
 
267
  - `eval_strategy`: steps
268
- - `per_device_train_batch_size`: 5000
269
- - `per_device_eval_batch_size`: 5000
270
  - `gradient_accumulation_steps`: 4
271
- - `weight_decay`: 0.02
272
- - `num_train_epochs`: 10
 
273
  - `warmup_ratio`: 0.1
274
- - `fp16`: True
275
  - `load_best_model_at_end`: True
276
  - `optim`: adafactor
277
- - `gradient_checkpointing`: True
278
 
279
  #### All Hyperparameters
 
280
  <details><summary>Click to expand</summary>
281
 
282
  - `overwrite_output_dir`: False
283
  - `do_predict`: False
284
  - `eval_strategy`: steps
285
  - `prediction_loss_only`: True
286
- - `per_device_train_batch_size`: 5000
287
- - `per_device_eval_batch_size`: 5000
288
  - `per_gpu_train_batch_size`: None
289
  - `per_gpu_eval_batch_size`: None
290
  - `gradient_accumulation_steps`: 4
291
  - `eval_accumulation_steps`: None
292
  - `torch_empty_cache_steps`: None
293
- - `learning_rate`: 5e-05
294
- - `weight_decay`: 0.02
295
  - `adam_beta1`: 0.9
296
  - `adam_beta2`: 0.999
297
  - `adam_epsilon`: 1e-08
298
  - `max_grad_norm`: 1.0
299
- - `num_train_epochs`: 10
300
  - `max_steps`: -1
301
  - `lr_scheduler_type`: linear
302
  - `lr_scheduler_kwargs`: {}
@@ -318,8 +371,8 @@ You can fine-tune this model on your own dataset.
318
  - `jit_mode_eval`: False
319
  - `use_ipex`: False
320
  - `bf16`: False
321
- - `fp16`: True
322
- - `fp16_opt_level`: O1
323
  - `half_precision_backend`: auto
324
  - `bf16_full_eval`: False
325
  - `fp16_full_eval`: False
@@ -364,7 +417,7 @@ You can fine-tune this model on your own dataset.
364
  - `hub_strategy`: every_save
365
  - `hub_private_repo`: None
366
  - `hub_always_push`: False
367
- - `gradient_checkpointing`: True
368
  - `gradient_checkpointing_kwargs`: None
369
  - `include_inputs_for_metrics`: False
370
  - `include_for_metrics`: []
@@ -372,7 +425,7 @@ You can fine-tune this model on your own dataset.
372
  - `fp16_backend`: auto
373
  - `push_to_hub_model_id`: None
374
  - `push_to_hub_organization`: None
375
- - `mp_parameters`:
376
  - `auto_find_batch_size`: False
377
  - `full_determinism`: False
378
  - `torchdynamo`: None
@@ -397,24 +450,25 @@ You can fine-tune this model on your own dataset.
397
  </details>
398
 
399
  ### Training Logs
400
- | Epoch | Step | Training Loss | Validation Loss | sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap |
401
- |:----------:|:--------:|:-------------:|:---------------:|:---------------------------------------------------------------------:|
402
- | -1 | -1 | - | - | 0.7195 |
403
- | 0.9368 | 100 | - | 0.0083 | 0.9597 |
404
- | 1.8712 | 200 | - | 0.0043 | 0.9877 |
405
- | 2.8056 | 300 | - | 0.0028 | 0.9936 |
406
- | 3.7400 | 400 | - | 0.0021 | 0.9954 |
407
- | 4.6745 | 500 | 0.0224 | 0.0016 | 0.9964 |
408
- | 5.6089 | 600 | - | 0.0015 | 0.9970 |
409
- | 6.5433 | 700 | - | 0.0014 | 0.9974 |
410
- | 7.4778 | 800 | - | 0.0013 | 0.9975 |
411
- | 8.4122 | 900 | - | 0.0013 | 0.9977 |
412
- | **9.3466** | **1000** | **0.0052** | **0.0012** | **0.9978** |
413
- | 9.9087 | 1060 | - | 0.0012 | 0.9978 |
414
-
415
- * The bold row denotes the saved checkpoint.
416
 
417
  ### Framework Versions
 
418
  - Python: 3.12.9
419
  - Sentence Transformers: 3.4.1
420
  - Transformers: 4.51.3
@@ -428,6 +482,7 @@ You can fine-tune this model on your own dataset.
428
  ### BibTeX
429
 
430
  #### Sentence Transformers
 
431
  ```bibtex
432
  @inproceedings{reimers-2019-sentence-bert,
433
  title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
@@ -441,6 +496,7 @@ You can fine-tune this model on your own dataset.
441
  ```
442
 
443
  #### ContrastiveLoss
 
444
  ```bibtex
445
  @inproceedings{hadsell2006dimensionality,
446
  author={Hadsell, R. and Chopra, S. and LeCun, Y.},
 
7
  - sentence-similarity
8
  - feature-extraction
9
  - generated_from_trainer
10
+ - dataset_size:2130621
11
  - loss:ContrastiveLoss
12
  base_model: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
13
  widget:
14
+ - source_sentence: Kim Chol-sam
15
  sentences:
16
+ - Stankevich Sergey Nikolayevich
17
+ - Kim Chin-So’k
18
+ - Julen Lopetegui Agote
19
+ - source_sentence: دينا بنت عبد الحميد
20
  sentences:
21
+ - Alexia van Amsberg
22
+ - Anthony Nicholas Colin Maitland Biddulph, 5th Baron Biddulph
23
+ - Dina bint Abdul-Hamíd
24
+ - source_sentence: Մուհամեդ բեն Նաիֆ Ալ Սաուդ
25
  sentences:
26
+ - Karpov Anatoly Evgenyevich
27
+ - GNPower Mariveles Coal Plant [former]
28
+ - Muhammed bin Nayef bin Abdul Aziz Al Saud
29
+ - source_sentence: Edward Gnehm
30
  sentences:
31
+ - Шауэрте, Хартмут
32
+ - Ханзада Филипп, Эдинбург герцогі
33
+ - AFX
34
+ - source_sentence: Schori i Lidingö
35
  sentences:
36
+ - Yordan Canev
37
+ - ကားပေါ့ အန်နာတိုလီ
38
+ - BYSTROV, Mikhail Ivanovich
39
  pipeline_tag: sentence-similarity
40
  library_name: sentence-transformers
41
  metrics:
 
58
  type: sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2
59
  metrics:
60
  - type: cosine_accuracy
61
+ value: 0.9843050674356433
62
  name: Cosine Accuracy
63
  - type: cosine_accuracy_threshold
64
+ value: 0.742120623588562
65
  name: Cosine Accuracy Threshold
66
  - type: cosine_f1
67
+ value: 0.9760932477723254
68
  name: Cosine F1
69
  - type: cosine_f1_threshold
70
+ value: 0.742120623588562
71
  name: Cosine F1 Threshold
72
  - type: cosine_precision
73
+ value: 0.9703216856372878
74
  name: Cosine Precision
75
  - type: cosine_recall
76
+ value: 0.9819338803033267
77
  name: Cosine Recall
78
  - type: cosine_ap
79
+ value: 0.9955554741842152
80
  name: Cosine Ap
81
  - type: cosine_mcc
82
+ value: 0.964449493634366
83
  name: Cosine Mcc
84
  ---
85
 
86
  # Graphlet-AI/eridu
87
 
88
+ Deep fuzzy matching people and company names for multilingual entity resolution using representation learning... that incorporates a deep understanding of people and company names and works _much better_ than string distance methods.
89
+
90
  This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2) for person and company name matching using the [Open Sanctions matcher training data](https://www.opensanctions.org/docs/pairs/). It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used as part of a deep, fuzzy entity resolution process.
91
 
92
  ## Model Details
93
 
94
+ ### TLDR: 5 Lines of Code
95
+
96
+ ```python
97
+ from sentence_transformers import SentenceTransformer
98
+
99
+
100
+ # Download from the 🤗 Hub
101
+ model = SentenceTransformer("Graphlet-AI/eridu")
102
+
103
+ names = [
104
+ "Russell Jurney",
105
+ "Russ Jurney",
106
+ "Русс Джерни",
107
+ ]
108
+
109
+ embeddings = model.encode(names)
110
+ print(embeddings.shape)
111
+ # [3, 384]
112
+
113
+ # Get the similarity scores for the embeddings
114
+ similarities = model.similarity(embeddings, embeddings)
115
+ print(similarities.shape)
116
+ # [3, 3]
117
+
118
+ print(similarities.numpy())
119
+ # [[0.9999999 0.99406826 0.99406105]
120
+ # [0.9940683 1. 0.9969202 ]
121
+ # [0.99406105 0.9969202 1. ]]
122
+ ```
123
+
124
+ ### Project Eridu Overview
125
+
126
+ This project is a deep fuzzy matching system for person and company names for entity resolution using representation learning. It is designed to match people and company names across languages and character sets, using a pre-trained text embedding model from HuggingFace that we fine-tune using contrastive learning on 2 million labeled pairs of person and company names from the [Open Sanctions Matcher training data](https://www.opensanctions.org/docs/pairs/). The project includes a command-line interface (CLI) utility for training the model and comparing pairs of names using cosine similarity.
127
+
128
+ Matching people and company names is an intractable problem using traditional parsing based methods: there is too much variation across cultures and jurisdictions to solve the problem by humans programming. This results in complex, cost prohibitive enterprise solutions for name matching like [IBM InfoSphere Global Name Management](https://www.ibm.com/products/ibm-infosphere-global-name-management). Machine learning is used on problems like this one of cultural relevance, where the time to manually programming a solution appproaches infinity, to automatically write a program. Since 2008 there has been an explosion of deep learning methods that automate feature engineering via representation learning methods including such as text embeddings.
129
+
130
+ This project loads the pre-trained [paraphrase-multilingual-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2) paraphrase model from HuggingFace and fine-tunes it for the name matching task using contrastive learning on more than 2 million labeled pairs of matching and non-matching (just as important) person and company names from the [Open Sanctions Matcher training data](https://www.opensanctions.org/docs/pairs/) to create a deep fuzzy matching system for entity resolution.
131
+
132
+ This model is available on HuggingFace Hub as [Graphlet-AI/eridu](https://huggingface.co/Graphlet-AI/eridu) and can be used in any Python project using the [Sentence Transformers](https://sbert.net/) library in five lines of code. The model is designed to be used for entity resolution tasks, such as matching people and company names across different languages and character sets when matching records.
133
+
134
  ### Model Description
135
+
136
  - **Model Type:** Sentence Transformer
137
  - **Base model:** [sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2) <!-- at revision 86741b4e3f5cb7765a600d3a3d55a0f6a6cb443d -->
138
  - **Maximum Sequence Length:** 128 tokens
 
147
  - **Documentation:** [Graphlet-AI/eridu Documentation](https://github.com/Graphlet-AI/eridu)
148
  - **Repository:** [Graphlet-AI/eridu on GitHub](https://github.com/Graphlet-AI/eridu)
149
  - **Hugging Face:** [Graphlet-AI/eridu on Hugging Face](https://huggingface.co/Graphlet-AI/eridu)
150
+ - **PyPi Package:** [Graphlet-AI/eridu on PyPi](https://pypi.org/project/eridu/)
151
 
152
  ### Full Model Architecture
153
 
 
169
  ```
170
 
171
  Then you can load this model and run inference.
172
+
173
  ```python
174
  from sentence_transformers import SentenceTransformer
175
 
176
+
177
  # Download from the 🤗 Hub
178
  model = SentenceTransformer("Graphlet-AI/eridu")
179
+ # Run inference
180
+ sentences = [
181
+ 'Schori i Lidingö',
182
+ 'Yordan Canev',
183
+ 'ကားပေါ့ အန်နာတိုလီ',
184
  ]
185
+ embeddings = model.encode(sentences)
 
186
  print(embeddings.shape)
187
  # [3, 384]
188
 
 
190
  similarities = model.similarity(embeddings, embeddings)
191
  print(similarities.shape)
192
  # [3, 3]
 
 
 
 
 
193
  ```
194
 
195
  <!--
 
203
  <!--
204
  ### Downstream Usage (Sentence Transformers)
205
 
206
+ You can finetune this model on your own dataset.
207
 
208
  <details><summary>Click to expand</summary>
209
 
 
222
 
223
  #### Binary Classification
224
 
225
+ - Dataset: `sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2`
226
+ - Evaluated with [<code>BinaryClassificationEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.BinaryClassificationEvaluator)
227
 
228
  | Metric | Value |
229
  |:--------------------------|:-----------|
230
+ | cosine_accuracy | 0.9843 |
231
+ | cosine_accuracy_threshold | 0.7421 |
232
+ | cosine_f1 | 0.9761 |
233
+ | cosine_f1_threshold | 0.7421 |
234
+ | cosine_precision | 0.9703 |
235
+ | cosine_recall | 0.9819 |
236
+ | **cosine_ap** | **0.9956** |
237
+ | cosine_mcc | 0.9644 |
238
 
239
  <!--
240
  ## Bias, Risks and Limitations
 
254
 
255
  #### Unnamed Dataset
256
 
257
+ - Size: 2,130,621 training samples
258
+ - Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>label</code>
259
+ - Approximate statistics based on the first 1000 samples:
260
+
261
  | | sentence1 | sentence2 | label |
262
  |:--------|:---------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|:---------------------------------------------------------------|
263
  | type | string | string | float |
264
+ | details | <ul><li>min: 3 tokens</li><li>mean: 9.32 tokens</li><li>max: 57 tokens</li></ul> | <ul><li>min: 3 tokens</li><li>mean: 9.16 tokens</li><li>max: 54 tokens</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.34</li><li>max: 1.0</li></ul> |
265
+
266
+ - Samples:
267
+
268
+ | sentence1 | sentence2 | label |
269
+ |:----------------------------------|:------------------------------------|:-----------------|
270
+ | <code>캐스린 설리번</code> | <code>Kathryn D. Sullivanová</code> | <code>1.0</code> |
271
+ | <code>ଶିବରାଜ ଅଧାଲରାଓ ପାଟିଲ</code> | <code>Aleksander Lubocki</code> | <code>0.0</code> |
272
+ | <code>Пырванов, Георги</code> | <code>アナトーリー・セルジュコフ</code> | <code>0.0</code> |
273
+
274
+ - Loss: [<code>ContrastiveLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#contrastiveloss) with these parameters:
275
+
276
  ```json
277
  {
278
  "distance_metric": "SiameseDistanceMetric.COSINE_DISTANCE",
 
285
 
286
  #### Unnamed Dataset
287
 
288
+ - Size: 2,663,276 evaluation samples
289
+ - Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>label</code>
290
+ - Approximate statistics based on the first 1000 samples:
291
+
292
+ | | sentence1 | sentence2 | label |
293
+ |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:---------------------------------------------------------------|
294
+ | type | string | string | float |
295
+ | details | <ul><li>min: 3 tokens</li><li>mean: 9.34 tokens</li><li>max: 102 tokens</li></ul> | <ul><li>min: 4 tokens</li><li>mean: 9.11 tokens</li><li>max: 100 tokens</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.33</li><li>max: 1.0</li></ul> |
296
+
297
+ - Samples:
298
+
299
+ | sentence1 | sentence2 | label |
300
+ |:--------------------------------------|:---------------------------------------|:-----------------|
301
+ | <code>Ева Херман</code> | <code>I Xuan Karlos</code> | <code>0.0</code> |
302
+ | <code>Кличков Андрій Євгенович</code> | <code>Андрэй Яўгенавіч Клычкоў</code> | <code>1.0</code> |
303
+ | <code>Кинах А.</code> | <code>Senator John Hickenlooper</code> | <code>0.0</code> |
304
+
305
+ - Loss: [<code>ContrastiveLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#contrastiveloss) with these parameters:
306
+
307
  ```json
308
  {
309
  "distance_metric": "SiameseDistanceMetric.COSINE_DISTANCE",
 
313
  ```
314
 
315
  ### Training Hyperparameters
316
+
317
  #### Non-Default Hyperparameters
318
 
319
  - `eval_strategy`: steps
320
+ - `per_device_train_batch_size`: 1000
321
+ - `per_device_eval_batch_size`: 1000
322
  - `gradient_accumulation_steps`: 4
323
+ - `learning_rate`: 3e-05
324
+ - `weight_decay`: 0.01
325
+ - `num_train_epochs`: 8
326
  - `warmup_ratio`: 0.1
327
+ - `fp16_opt_level`: O0
328
  - `load_best_model_at_end`: True
329
  - `optim`: adafactor
 
330
 
331
  #### All Hyperparameters
332
+
333
  <details><summary>Click to expand</summary>
334
 
335
  - `overwrite_output_dir`: False
336
  - `do_predict`: False
337
  - `eval_strategy`: steps
338
  - `prediction_loss_only`: True
339
+ - `per_device_train_batch_size`: 1000
340
+ - `per_device_eval_batch_size`: 1000
341
  - `per_gpu_train_batch_size`: None
342
  - `per_gpu_eval_batch_size`: None
343
  - `gradient_accumulation_steps`: 4
344
  - `eval_accumulation_steps`: None
345
  - `torch_empty_cache_steps`: None
346
+ - `learning_rate`: 3e-05
347
+ - `weight_decay`: 0.01
348
  - `adam_beta1`: 0.9
349
  - `adam_beta2`: 0.999
350
  - `adam_epsilon`: 1e-08
351
  - `max_grad_norm`: 1.0
352
+ - `num_train_epochs`: 8
353
  - `max_steps`: -1
354
  - `lr_scheduler_type`: linear
355
  - `lr_scheduler_kwargs`: {}
 
371
  - `jit_mode_eval`: False
372
  - `use_ipex`: False
373
  - `bf16`: False
374
+ - `fp16`: False
375
+ - `fp16_opt_level`: O0
376
  - `half_precision_backend`: auto
377
  - `bf16_full_eval`: False
378
  - `fp16_full_eval`: False
 
417
  - `hub_strategy`: every_save
418
  - `hub_private_repo`: None
419
  - `hub_always_push`: False
420
+ - `gradient_checkpointing`: False
421
  - `gradient_checkpointing_kwargs`: None
422
  - `include_inputs_for_metrics`: False
423
  - `include_for_metrics`: []
 
425
  - `fp16_backend`: auto
426
  - `push_to_hub_model_id`: None
427
  - `push_to_hub_organization`: None
428
+ - `mp_parameters`:
429
  - `auto_find_batch_size`: False
430
  - `full_determinism`: False
431
  - `torchdynamo`: None
 
450
  </details>
451
 
452
  ### Training Logs
453
+
454
+ | Epoch | Step | Training Loss | Validation Loss | sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap |
455
+ |:------:|:----:|:-------------:|:---------------:|:---------------------------------------------------------------------:|
456
+ | -1 | -1 | - | - | 0.7140 |
457
+ | 0.1877 | 100 | - | 0.0125 | 0.8849 |
458
+ | 0.3754 | 200 | - | 0.0090 | 0.9369 |
459
+ | 0.5631 | 300 | - | 0.0068 | 0.9630 |
460
+ | 0.7508 | 400 | - | 0.0052 | 0.9774 |
461
+ | 0.9385 | 500 | 0.0409 | 0.0040 | 0.9845 |
462
+ | 1.1276 | 600 | - | 0.0033 | 0.9887 |
463
+ | 1.3153 | 700 | - | 0.0028 | 0.9911 |
464
+ | 1.5031 | 800 | - | 0.0026 | 0.9927 |
465
+ | 1.6908 | 900 | - | 0.0022 | 0.9938 |
466
+ | 1.8785 | 1000 | 0.0131 | 0.0022 | 0.9944 |
467
+ | 2.0676 | 1100 | - | 0.0019 | 0.9950 |
468
+ | 2.2553 | 1200 | - | 0.0017 | 0.9956 |
469
 
470
  ### Framework Versions
471
+
472
  - Python: 3.12.9
473
  - Sentence Transformers: 3.4.1
474
  - Transformers: 4.51.3
 
482
  ### BibTeX
483
 
484
  #### Sentence Transformers
485
+
486
  ```bibtex
487
  @inproceedings{reimers-2019-sentence-bert,
488
  title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
 
496
  ```
497
 
498
  #### ContrastiveLoss
499
+
500
  ```bibtex
501
  @inproceedings{hadsell2006dimensionality,
502
  author={Hadsell, R. and Chopra, S. and LeCun, Y.},
checkpoint-1000/1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 384,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
checkpoint-1000/README.md ADDED
@@ -0,0 +1,466 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ license: apache-2.0
5
+ tags:
6
+ - sentence-transformers
7
+ - sentence-similarity
8
+ - feature-extraction
9
+ - generated_from_trainer
10
+ - dataset_size:2130621
11
+ - loss:ContrastiveLoss
12
+ base_model: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
13
+ widget:
14
+ - source_sentence: Kim Chol-sam
15
+ sentences:
16
+ - Stankevich Sergey Nikolayevich
17
+ - Kim Chin-So’k
18
+ - Julen Lopetegui Agote
19
+ - source_sentence: دينا بنت عبد الحميد
20
+ sentences:
21
+ - Alexia van Amsberg
22
+ - Anthony Nicholas Colin Maitland Biddulph, 5th Baron Biddulph
23
+ - Dina bint Abdul-Hamíd
24
+ - source_sentence: Մուհամեդ բեն Նաիֆ Ալ Սաուդ
25
+ sentences:
26
+ - Karpov Anatoly Evgenyevich
27
+ - GNPower Mariveles Coal Plant [former]
28
+ - Muhammed bin Nayef bin Abdul Aziz Al Saud
29
+ - source_sentence: Edward Gnehm
30
+ sentences:
31
+ - Шауэрте, Хартмут
32
+ - Ханзада Филипп, Эдинбург герцогі
33
+ - AFX
34
+ - source_sentence: Schori i Lidingö
35
+ sentences:
36
+ - Yordan Canev
37
+ - ကားပေါ့ အန်နာတိုလီ
38
+ - BYSTROV, Mikhail Ivanovich
39
+ pipeline_tag: sentence-similarity
40
+ library_name: sentence-transformers
41
+ metrics:
42
+ - cosine_accuracy
43
+ - cosine_accuracy_threshold
44
+ - cosine_f1
45
+ - cosine_f1_threshold
46
+ - cosine_precision
47
+ - cosine_recall
48
+ - cosine_ap
49
+ - cosine_mcc
50
+ model-index:
51
+ - name: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2-name-matcher-original
52
+ results:
53
+ - task:
54
+ type: binary-classification
55
+ name: Binary Classification
56
+ dataset:
57
+ name: sentence transformers paraphrase multilingual MiniLM L12 v2
58
+ type: sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2
59
+ metrics:
60
+ - type: cosine_accuracy
61
+ value: 0.9817931272716349
62
+ name: Cosine Accuracy
63
+ - type: cosine_accuracy_threshold
64
+ value: 0.7197962999343872
65
+ name: Cosine Accuracy Threshold
66
+ - type: cosine_f1
67
+ value: 0.9722373310278887
68
+ name: Cosine F1
69
+ - type: cosine_f1_threshold
70
+ value: 0.7091608047485352
71
+ name: Cosine F1 Threshold
72
+ - type: cosine_precision
73
+ value: 0.9675121928984912
74
+ name: Cosine Precision
75
+ - type: cosine_recall
76
+ value: 0.9770088489465266
77
+ name: Cosine Recall
78
+ - type: cosine_ap
79
+ value: 0.9944127523785896
80
+ name: Cosine Ap
81
+ - type: cosine_mcc
82
+ value: 0.9587183163648803
83
+ name: Cosine Mcc
84
+ ---
85
+
86
+ # sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2-name-matcher-original
87
+
88
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2). It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
89
+
90
+ ## Model Details
91
+
92
+ ### Model Description
93
+ - **Model Type:** Sentence Transformer
94
+ - **Base model:** [sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2) <!-- at revision 86741b4e3f5cb7765a600d3a3d55a0f6a6cb443d -->
95
+ - **Maximum Sequence Length:** 128 tokens
96
+ - **Output Dimensionality:** 384 dimensions
97
+ - **Similarity Function:** Cosine Similarity
98
+ <!-- - **Training Dataset:** Unknown -->
99
+ - **Language:** en
100
+ - **License:** apache-2.0
101
+
102
+ ### Model Sources
103
+
104
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
105
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
106
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
107
+
108
+ ### Full Model Architecture
109
+
110
+ ```
111
+ SentenceTransformer(
112
+ (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel
113
+ (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
114
+ )
115
+ ```
116
+
117
+ ## Usage
118
+
119
+ ### Direct Usage (Sentence Transformers)
120
+
121
+ First install the Sentence Transformers library:
122
+
123
+ ```bash
124
+ pip install -U sentence-transformers
125
+ ```
126
+
127
+ Then you can load this model and run inference.
128
+ ```python
129
+ from sentence_transformers import SentenceTransformer
130
+
131
+ # Download from the 🤗 Hub
132
+ model = SentenceTransformer("sentence_transformers_model_id")
133
+ # Run inference
134
+ sentences = [
135
+ 'Schori i Lidingö',
136
+ 'Yordan Canev',
137
+ 'ကားပေါ့ အန်နာတိုလီ',
138
+ ]
139
+ embeddings = model.encode(sentences)
140
+ print(embeddings.shape)
141
+ # [3, 384]
142
+
143
+ # Get the similarity scores for the embeddings
144
+ similarities = model.similarity(embeddings, embeddings)
145
+ print(similarities.shape)
146
+ # [3, 3]
147
+ ```
148
+
149
+ <!--
150
+ ### Direct Usage (Transformers)
151
+
152
+ <details><summary>Click to see the direct usage in Transformers</summary>
153
+
154
+ </details>
155
+ -->
156
+
157
+ <!--
158
+ ### Downstream Usage (Sentence Transformers)
159
+
160
+ You can finetune this model on your own dataset.
161
+
162
+ <details><summary>Click to expand</summary>
163
+
164
+ </details>
165
+ -->
166
+
167
+ <!--
168
+ ### Out-of-Scope Use
169
+
170
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
171
+ -->
172
+
173
+ ## Evaluation
174
+
175
+ ### Metrics
176
+
177
+ #### Binary Classification
178
+
179
+ * Dataset: `sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2`
180
+ * Evaluated with [<code>BinaryClassificationEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.BinaryClassificationEvaluator)
181
+
182
+ | Metric | Value |
183
+ |:--------------------------|:-----------|
184
+ | cosine_accuracy | 0.9818 |
185
+ | cosine_accuracy_threshold | 0.7198 |
186
+ | cosine_f1 | 0.9722 |
187
+ | cosine_f1_threshold | 0.7092 |
188
+ | cosine_precision | 0.9675 |
189
+ | cosine_recall | 0.977 |
190
+ | **cosine_ap** | **0.9944** |
191
+ | cosine_mcc | 0.9587 |
192
+
193
+ <!--
194
+ ## Bias, Risks and Limitations
195
+
196
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
197
+ -->
198
+
199
+ <!--
200
+ ### Recommendations
201
+
202
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
203
+ -->
204
+
205
+ ## Training Details
206
+
207
+ ### Training Dataset
208
+
209
+ #### Unnamed Dataset
210
+
211
+ * Size: 2,130,621 training samples
212
+ * Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>label</code>
213
+ * Approximate statistics based on the first 1000 samples:
214
+ | | sentence1 | sentence2 | label |
215
+ |:--------|:---------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|:---------------------------------------------------------------|
216
+ | type | string | string | float |
217
+ | details | <ul><li>min: 3 tokens</li><li>mean: 9.32 tokens</li><li>max: 57 tokens</li></ul> | <ul><li>min: 3 tokens</li><li>mean: 9.16 tokens</li><li>max: 54 tokens</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.34</li><li>max: 1.0</li></ul> |
218
+ * Samples:
219
+ | sentence1 | sentence2 | label |
220
+ |:----------------------------------|:------------------------------------|:-----------------|
221
+ | <code>캐스린 설리번</code> | <code>Kathryn D. Sullivanová</code> | <code>1.0</code> |
222
+ | <code>ଶିବରାଜ ଅଧାଲରାଓ ପାଟିଲ</code> | <code>Aleksander Lubocki</code> | <code>0.0</code> |
223
+ | <code>Пырванов, Георги</code> | <code>アナトーリー・セルジュコフ</code> | <code>0.0</code> |
224
+ * Loss: [<code>ContrastiveLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#contrastiveloss) with these parameters:
225
+ ```json
226
+ {
227
+ "distance_metric": "SiameseDistanceMetric.COSINE_DISTANCE",
228
+ "margin": 0.5,
229
+ "size_average": true
230
+ }
231
+ ```
232
+
233
+ ### Evaluation Dataset
234
+
235
+ #### Unnamed Dataset
236
+
237
+ * Size: 2,663,276 evaluation samples
238
+ * Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>label</code>
239
+ * Approximate statistics based on the first 1000 samples:
240
+ | | sentence1 | sentence2 | label |
241
+ |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:---------------------------------------------------------------|
242
+ | type | string | string | float |
243
+ | details | <ul><li>min: 3 tokens</li><li>mean: 9.34 tokens</li><li>max: 102 tokens</li></ul> | <ul><li>min: 4 tokens</li><li>mean: 9.11 tokens</li><li>max: 100 tokens</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.33</li><li>max: 1.0</li></ul> |
244
+ * Samples:
245
+ | sentence1 | sentence2 | label |
246
+ |:--------------------------------------|:---------------------------------------|:-----------------|
247
+ | <code>Ева Херман</code> | <code>I Xuan Karlos</code> | <code>0.0</code> |
248
+ | <code>Кличков Андрій Євгенович</code> | <code>Андрэй Яўгенавіч Клычкоў</code> | <code>1.0</code> |
249
+ | <code>Кинах А.</code> | <code>Senator John Hickenlooper</code> | <code>0.0</code> |
250
+ * Loss: [<code>ContrastiveLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#contrastiveloss) with these parameters:
251
+ ```json
252
+ {
253
+ "distance_metric": "SiameseDistanceMetric.COSINE_DISTANCE",
254
+ "margin": 0.5,
255
+ "size_average": true
256
+ }
257
+ ```
258
+
259
+ ### Training Hyperparameters
260
+ #### Non-Default Hyperparameters
261
+
262
+ - `eval_strategy`: steps
263
+ - `per_device_train_batch_size`: 1000
264
+ - `per_device_eval_batch_size`: 1000
265
+ - `gradient_accumulation_steps`: 4
266
+ - `learning_rate`: 3e-05
267
+ - `weight_decay`: 0.01
268
+ - `num_train_epochs`: 8
269
+ - `warmup_ratio`: 0.1
270
+ - `fp16_opt_level`: O0
271
+ - `load_best_model_at_end`: True
272
+ - `optim`: adafactor
273
+
274
+ #### All Hyperparameters
275
+ <details><summary>Click to expand</summary>
276
+
277
+ - `overwrite_output_dir`: False
278
+ - `do_predict`: False
279
+ - `eval_strategy`: steps
280
+ - `prediction_loss_only`: True
281
+ - `per_device_train_batch_size`: 1000
282
+ - `per_device_eval_batch_size`: 1000
283
+ - `per_gpu_train_batch_size`: None
284
+ - `per_gpu_eval_batch_size`: None
285
+ - `gradient_accumulation_steps`: 4
286
+ - `eval_accumulation_steps`: None
287
+ - `torch_empty_cache_steps`: None
288
+ - `learning_rate`: 3e-05
289
+ - `weight_decay`: 0.01
290
+ - `adam_beta1`: 0.9
291
+ - `adam_beta2`: 0.999
292
+ - `adam_epsilon`: 1e-08
293
+ - `max_grad_norm`: 1.0
294
+ - `num_train_epochs`: 8
295
+ - `max_steps`: -1
296
+ - `lr_scheduler_type`: linear
297
+ - `lr_scheduler_kwargs`: {}
298
+ - `warmup_ratio`: 0.1
299
+ - `warmup_steps`: 0
300
+ - `log_level`: passive
301
+ - `log_level_replica`: warning
302
+ - `log_on_each_node`: True
303
+ - `logging_nan_inf_filter`: True
304
+ - `save_safetensors`: True
305
+ - `save_on_each_node`: False
306
+ - `save_only_model`: False
307
+ - `restore_callback_states_from_checkpoint`: False
308
+ - `no_cuda`: False
309
+ - `use_cpu`: False
310
+ - `use_mps_device`: False
311
+ - `seed`: 42
312
+ - `data_seed`: None
313
+ - `jit_mode_eval`: False
314
+ - `use_ipex`: False
315
+ - `bf16`: False
316
+ - `fp16`: False
317
+ - `fp16_opt_level`: O0
318
+ - `half_precision_backend`: auto
319
+ - `bf16_full_eval`: False
320
+ - `fp16_full_eval`: False
321
+ - `tf32`: None
322
+ - `local_rank`: 0
323
+ - `ddp_backend`: None
324
+ - `tpu_num_cores`: None
325
+ - `tpu_metrics_debug`: False
326
+ - `debug`: []
327
+ - `dataloader_drop_last`: False
328
+ - `dataloader_num_workers`: 0
329
+ - `dataloader_prefetch_factor`: None
330
+ - `past_index`: -1
331
+ - `disable_tqdm`: False
332
+ - `remove_unused_columns`: True
333
+ - `label_names`: None
334
+ - `load_best_model_at_end`: True
335
+ - `ignore_data_skip`: False
336
+ - `fsdp`: []
337
+ - `fsdp_min_num_params`: 0
338
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
339
+ - `tp_size`: 0
340
+ - `fsdp_transformer_layer_cls_to_wrap`: None
341
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
342
+ - `deepspeed`: None
343
+ - `label_smoothing_factor`: 0.0
344
+ - `optim`: adafactor
345
+ - `optim_args`: None
346
+ - `adafactor`: False
347
+ - `group_by_length`: False
348
+ - `length_column_name`: length
349
+ - `ddp_find_unused_parameters`: None
350
+ - `ddp_bucket_cap_mb`: None
351
+ - `ddp_broadcast_buffers`: False
352
+ - `dataloader_pin_memory`: True
353
+ - `dataloader_persistent_workers`: False
354
+ - `skip_memory_metrics`: True
355
+ - `use_legacy_prediction_loop`: False
356
+ - `push_to_hub`: False
357
+ - `resume_from_checkpoint`: None
358
+ - `hub_model_id`: None
359
+ - `hub_strategy`: every_save
360
+ - `hub_private_repo`: None
361
+ - `hub_always_push`: False
362
+ - `gradient_checkpointing`: False
363
+ - `gradient_checkpointing_kwargs`: None
364
+ - `include_inputs_for_metrics`: False
365
+ - `include_for_metrics`: []
366
+ - `eval_do_concat_batches`: True
367
+ - `fp16_backend`: auto
368
+ - `push_to_hub_model_id`: None
369
+ - `push_to_hub_organization`: None
370
+ - `mp_parameters`:
371
+ - `auto_find_batch_size`: False
372
+ - `full_determinism`: False
373
+ - `torchdynamo`: None
374
+ - `ray_scope`: last
375
+ - `ddp_timeout`: 1800
376
+ - `torch_compile`: False
377
+ - `torch_compile_backend`: None
378
+ - `torch_compile_mode`: None
379
+ - `include_tokens_per_second`: False
380
+ - `include_num_input_tokens_seen`: False
381
+ - `neftune_noise_alpha`: None
382
+ - `optim_target_modules`: None
383
+ - `batch_eval_metrics`: False
384
+ - `eval_on_start`: False
385
+ - `use_liger_kernel`: False
386
+ - `eval_use_gather_object`: False
387
+ - `average_tokens_across_devices`: False
388
+ - `prompts`: None
389
+ - `batch_sampler`: batch_sampler
390
+ - `multi_dataset_batch_sampler`: proportional
391
+
392
+ </details>
393
+
394
+ ### Training Logs
395
+ | Epoch | Step | Training Loss | Validation Loss | sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap |
396
+ |:------:|:----:|:-------------:|:---------------:|:---------------------------------------------------------------------:|
397
+ | -1 | -1 | - | - | 0.7140 |
398
+ | 0.1877 | 100 | - | 0.0125 | 0.8849 |
399
+ | 0.3754 | 200 | - | 0.0090 | 0.9369 |
400
+ | 0.5631 | 300 | - | 0.0068 | 0.9630 |
401
+ | 0.7508 | 400 | - | 0.0052 | 0.9774 |
402
+ | 0.9385 | 500 | 0.0409 | 0.0040 | 0.9845 |
403
+ | 1.1276 | 600 | - | 0.0033 | 0.9887 |
404
+ | 1.3153 | 700 | - | 0.0028 | 0.9911 |
405
+ | 1.5031 | 800 | - | 0.0026 | 0.9927 |
406
+ | 1.6908 | 900 | - | 0.0022 | 0.9938 |
407
+ | 1.8785 | 1000 | 0.0131 | 0.0022 | 0.9944 |
408
+
409
+
410
+ ### Framework Versions
411
+ - Python: 3.12.9
412
+ - Sentence Transformers: 3.4.1
413
+ - Transformers: 4.51.3
414
+ - PyTorch: 2.7.0+cu126
415
+ - Accelerate: 1.6.0
416
+ - Datasets: 3.6.0
417
+ - Tokenizers: 0.21.1
418
+
419
+ ## Citation
420
+
421
+ ### BibTeX
422
+
423
+ #### Sentence Transformers
424
+ ```bibtex
425
+ @inproceedings{reimers-2019-sentence-bert,
426
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
427
+ author = "Reimers, Nils and Gurevych, Iryna",
428
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
429
+ month = "11",
430
+ year = "2019",
431
+ publisher = "Association for Computational Linguistics",
432
+ url = "https://arxiv.org/abs/1908.10084",
433
+ }
434
+ ```
435
+
436
+ #### ContrastiveLoss
437
+ ```bibtex
438
+ @inproceedings{hadsell2006dimensionality,
439
+ author={Hadsell, R. and Chopra, S. and LeCun, Y.},
440
+ booktitle={2006 IEEE Computer Society Conference on Computer Vision and Pattern Recognition (CVPR'06)},
441
+ title={Dimensionality Reduction by Learning an Invariant Mapping},
442
+ year={2006},
443
+ volume={2},
444
+ number={},
445
+ pages={1735-1742},
446
+ doi={10.1109/CVPR.2006.100}
447
+ }
448
+ ```
449
+
450
+ <!--
451
+ ## Glossary
452
+
453
+ *Clearly define terms in order to be accessible across audiences.*
454
+ -->
455
+
456
+ <!--
457
+ ## Model Card Authors
458
+
459
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
460
+ -->
461
+
462
+ <!--
463
+ ## Model Card Contact
464
+
465
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
466
+ -->
checkpoint-1000/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertModel"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 384,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 1536,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "bert",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 12,
18
+ "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
20
+ "torch_dtype": "float32",
21
+ "transformers_version": "4.51.3",
22
+ "type_vocab_size": 2,
23
+ "use_cache": true,
24
+ "vocab_size": 250037
25
+ }
checkpoint-1000/config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "3.4.1",
4
+ "transformers": "4.51.3",
5
+ "pytorch": "2.7.0+cu126"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": "cosine"
10
+ }
checkpoint-1000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53cdf706594f9c2e35f539f5023cae863f9d5c0e8588348281d86e7ac79b4662
3
+ size 470637416
checkpoint-1000/modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ }
14
+ ]
checkpoint-1000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c108fa814d36d19a8e9c702a9800909a0bdbcc7bbc32071418d14ec158efbaf5
3
+ size 1715019
checkpoint-1000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31530f34a96cd557f736a4c9e2dbdab66da89f3ee40e3c858c87c688d4a1b9a1
3
+ size 14645
checkpoint-1000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8985e0cd69062d78f38bea6c82894c697cf2eff7e9a24bf93fa0da194c1b5e7
3
+ size 1465
checkpoint-1000/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 128,
3
+ "do_lower_case": false
4
+ }
checkpoint-1000/special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
checkpoint-1000/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cad551d5600a84242d0973327029452a1e3672ba6313c2a3c3d69c4310e12719
3
+ size 17082987
checkpoint-1000/tokenizer_config.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "250001": {
36
+ "content": "<mask>",
37
+ "lstrip": true,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "<s>",
45
+ "clean_up_tokenization_spaces": false,
46
+ "cls_token": "<s>",
47
+ "do_lower_case": true,
48
+ "eos_token": "</s>",
49
+ "extra_special_tokens": {},
50
+ "mask_token": "<mask>",
51
+ "max_length": 128,
52
+ "model_max_length": 128,
53
+ "pad_to_multiple_of": null,
54
+ "pad_token": "<pad>",
55
+ "pad_token_type_id": 0,
56
+ "padding_side": "right",
57
+ "sep_token": "</s>",
58
+ "stride": 0,
59
+ "strip_accents": null,
60
+ "tokenize_chinese_chars": true,
61
+ "tokenizer_class": "BertTokenizer",
62
+ "truncation_side": "right",
63
+ "truncation_strategy": "longest_first",
64
+ "unk_token": "<unk>"
65
+ }
checkpoint-1000/trainer_state.json ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 1000,
3
+ "best_metric": 0.002240537665784359,
4
+ "best_model_checkpoint": "data/fine-tuned-sbert-sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2-original-adafactor/checkpoint-1000",
5
+ "epoch": 1.8784608165180665,
6
+ "eval_steps": 100,
7
+ "global_step": 1000,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.18770530267480057,
14
+ "eval_loss": 0.012530049309134483,
15
+ "eval_runtime": 812.6802,
16
+ "eval_samples_per_second": 3277.151,
17
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.8778235859541618,
18
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7128396034240723,
19
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.8848748516159781,
20
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.812583495899967,
21
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.6880456209182739,
22
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.7185793630359445,
23
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.7900823930955021,
24
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.8364038065429271,
25
+ "eval_steps_per_second": 3.278,
26
+ "step": 100
27
+ },
28
+ {
29
+ "epoch": 0.37541060534960113,
30
+ "eval_loss": 0.009013425558805466,
31
+ "eval_runtime": 792.9843,
32
+ "eval_samples_per_second": 3358.548,
33
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9164113424048541,
34
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7378441095352173,
35
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9368603114664952,
36
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.8729798695775446,
37
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7272344827651978,
38
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.8103205315460159,
39
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.8605654745268148,
40
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.8857576838544123,
41
+ "eval_steps_per_second": 3.359,
42
+ "step": 200
43
+ },
44
+ {
45
+ "epoch": 0.5631159080244017,
46
+ "eval_loss": 0.006819029338657856,
47
+ "eval_runtime": 809.9704,
48
+ "eval_samples_per_second": 3288.115,
49
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9398298338890391,
50
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7449667453765869,
51
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9629957356284182,
52
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9088032597499417,
53
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7449667453765869,
54
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.864029341509194,
55
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.8990159430733201,
56
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9188060251084542,
57
+ "eval_steps_per_second": 3.289,
58
+ "step": 300
59
+ },
60
+ {
61
+ "epoch": 0.7508212106992023,
62
+ "eval_loss": 0.005150709766894579,
63
+ "eval_runtime": 797.9199,
64
+ "eval_samples_per_second": 3337.773,
65
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9560016220600163,
66
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7553268671035767,
67
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9774059659768239,
68
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9333702119012406,
69
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7449506521224976,
70
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.9005457325671423,
71
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.916037892637527,
72
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9513710688929036,
73
+ "eval_steps_per_second": 3.339,
74
+ "step": 400
75
+ },
76
+ {
77
+ "epoch": 0.9385265133740028,
78
+ "grad_norm": 0.17396493256092072,
79
+ "learning_rate": 2.9428198433420364e-05,
80
+ "loss": 0.0409,
81
+ "step": 500
82
+ },
83
+ {
84
+ "epoch": 0.9385265133740028,
85
+ "eval_loss": 0.003973629325628281,
86
+ "eval_runtime": 809.4532,
87
+ "eval_samples_per_second": 3290.216,
88
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9655950557207654,
89
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7622435092926025,
90
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9845099503823473,
91
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9477742208778024,
92
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7535413503646851,
93
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.9221773981286795,
94
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.9367750202319935,
95
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9590347859107281,
96
+ "eval_steps_per_second": 3.291,
97
+ "step": 500
98
+ },
99
+ {
100
+ "epoch": 1.1276396058188645,
101
+ "eval_loss": 0.0032712339889258146,
102
+ "eval_runtime": 793.7573,
103
+ "eval_samples_per_second": 3355.277,
104
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9712722657775374,
105
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7610360383987427,
106
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9887055977101925,
107
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9564087809158087,
108
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7610177993774414,
109
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.9350876149915242,
110
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.9471753898932449,
111
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9658239646502422,
112
+ "eval_steps_per_second": 3.356,
113
+ "step": 600
114
+ },
115
+ {
116
+ "epoch": 1.3153449084936648,
117
+ "eval_loss": 0.0028166945558041334,
118
+ "eval_runtime": 815.1943,
119
+ "eval_samples_per_second": 3267.044,
120
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9751246583160614,
121
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7577522993087769,
122
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9911117019106511,
123
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9621558129059113,
124
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7424367666244507,
125
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.943665667488554,
126
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.9536134909690983,
127
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9708525597505264,
128
+ "eval_steps_per_second": 3.268,
129
+ "step": 700
130
+ },
131
+ {
132
+ "epoch": 1.5030502111684654,
133
+ "eval_loss": 0.0026242006570100784,
134
+ "eval_runtime": 805.7115,
135
+ "eval_samples_per_second": 3305.496,
136
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9782673995974888,
137
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7254683971405029,
138
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9927214598054878,
139
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9669240257663667,
140
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7145971059799194,
141
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.9507846488068235,
142
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.9597660102710608,
143
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9741896137072368,
144
+ "eval_steps_per_second": 3.306,
145
+ "step": 800
146
+ },
147
+ {
148
+ "epoch": 1.690755513843266,
149
+ "eval_loss": 0.002248650649562478,
150
+ "eval_runtime": 818.5338,
151
+ "eval_samples_per_second": 3253.715,
152
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9801973506353069,
153
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7349117994308472,
154
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9938133122786723,
155
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9698356230196407,
156
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7348856329917908,
157
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.9551340483533577,
158
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.9641228578901284,
159
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9756164919507957,
160
+ "eval_steps_per_second": 3.255,
161
+ "step": 900
162
+ },
163
+ {
164
+ "epoch": 1.8784608165180665,
165
+ "grad_norm": 0.07541557401418686,
166
+ "learning_rate": 2.5511749347258486e-05,
167
+ "loss": 0.0131,
168
+ "step": 1000
169
+ },
170
+ {
171
+ "epoch": 1.8784608165180665,
172
+ "eval_loss": 0.002240537665784359,
173
+ "eval_runtime": 803.6286,
174
+ "eval_samples_per_second": 3314.063,
175
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9817931272716349,
176
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7197962999343872,
177
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9944127523785896,
178
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9722373310278887,
179
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7091608047485352,
180
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.9587183163648803,
181
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.9675121928984912,
182
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9770088489465266,
183
+ "eval_steps_per_second": 3.315,
184
+ "step": 1000
185
+ }
186
+ ],
187
+ "logging_steps": 500,
188
+ "max_steps": 4256,
189
+ "num_input_tokens_seen": 0,
190
+ "num_train_epochs": 8,
191
+ "save_steps": 100,
192
+ "stateful_callbacks": {
193
+ "EarlyStoppingCallback": {
194
+ "args": {
195
+ "early_stopping_patience": 1,
196
+ "early_stopping_threshold": 0.0
197
+ },
198
+ "attributes": {
199
+ "early_stopping_patience_counter": 0
200
+ }
201
+ },
202
+ "TrainerControl": {
203
+ "args": {
204
+ "should_epoch_stop": false,
205
+ "should_evaluate": false,
206
+ "should_log": false,
207
+ "should_save": true,
208
+ "should_training_stop": false
209
+ },
210
+ "attributes": {}
211
+ }
212
+ },
213
+ "total_flos": 0.0,
214
+ "train_batch_size": 1000,
215
+ "trial_name": null,
216
+ "trial_params": null
217
+ }
checkpoint-1000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9339753774865faea550d7da93688221ca0f43171c16e3034645a2149992c8a6
3
+ size 6033
checkpoint-1000/unigram.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da145b5e7700ae40f16691ec32a0b1fdc1ee3298db22a31ea55f57a966c4a65d
3
+ size 14763260
checkpoint-1100/1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 384,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
checkpoint-1100/README.md ADDED
@@ -0,0 +1,467 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ license: apache-2.0
5
+ tags:
6
+ - sentence-transformers
7
+ - sentence-similarity
8
+ - feature-extraction
9
+ - generated_from_trainer
10
+ - dataset_size:2130621
11
+ - loss:ContrastiveLoss
12
+ base_model: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
13
+ widget:
14
+ - source_sentence: Kim Chol-sam
15
+ sentences:
16
+ - Stankevich Sergey Nikolayevich
17
+ - Kim Chin-So’k
18
+ - Julen Lopetegui Agote
19
+ - source_sentence: دينا بنت عبد الحميد
20
+ sentences:
21
+ - Alexia van Amsberg
22
+ - Anthony Nicholas Colin Maitland Biddulph, 5th Baron Biddulph
23
+ - Dina bint Abdul-Hamíd
24
+ - source_sentence: Մուհամեդ բեն Նաիֆ Ալ Սաուդ
25
+ sentences:
26
+ - Karpov Anatoly Evgenyevich
27
+ - GNPower Mariveles Coal Plant [former]
28
+ - Muhammed bin Nayef bin Abdul Aziz Al Saud
29
+ - source_sentence: Edward Gnehm
30
+ sentences:
31
+ - Шауэрте, Хартмут
32
+ - Ханзада Филипп, Эдинбург герцогі
33
+ - AFX
34
+ - source_sentence: Schori i Lidingö
35
+ sentences:
36
+ - Yordan Canev
37
+ - ကားပေါ့ အန်နာတိုလီ
38
+ - BYSTROV, Mikhail Ivanovich
39
+ pipeline_tag: sentence-similarity
40
+ library_name: sentence-transformers
41
+ metrics:
42
+ - cosine_accuracy
43
+ - cosine_accuracy_threshold
44
+ - cosine_f1
45
+ - cosine_f1_threshold
46
+ - cosine_precision
47
+ - cosine_recall
48
+ - cosine_ap
49
+ - cosine_mcc
50
+ model-index:
51
+ - name: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2-name-matcher-original
52
+ results:
53
+ - task:
54
+ type: binary-classification
55
+ name: Binary Classification
56
+ dataset:
57
+ name: sentence transformers paraphrase multilingual MiniLM L12 v2
58
+ type: sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2
59
+ metrics:
60
+ - type: cosine_accuracy
61
+ value: 0.9828594815415578
62
+ name: Cosine Accuracy
63
+ - type: cosine_accuracy_threshold
64
+ value: 0.7552986741065979
65
+ name: Cosine Accuracy Threshold
66
+ - type: cosine_f1
67
+ value: 0.973889221813201
68
+ name: Cosine F1
69
+ - type: cosine_f1_threshold
70
+ value: 0.7401974201202393
71
+ name: Cosine F1 Threshold
72
+ - type: cosine_precision
73
+ value: 0.9661201195760486
74
+ name: Cosine Precision
75
+ - type: cosine_recall
76
+ value: 0.9817842882294052
77
+ name: Cosine Recall
78
+ - type: cosine_ap
79
+ value: 0.9950493119597241
80
+ name: Cosine Ap
81
+ - type: cosine_mcc
82
+ value: 0.9611601510291333
83
+ name: Cosine Mcc
84
+ ---
85
+
86
+ # sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2-name-matcher-original
87
+
88
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2). It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
89
+
90
+ ## Model Details
91
+
92
+ ### Model Description
93
+ - **Model Type:** Sentence Transformer
94
+ - **Base model:** [sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2) <!-- at revision 86741b4e3f5cb7765a600d3a3d55a0f6a6cb443d -->
95
+ - **Maximum Sequence Length:** 128 tokens
96
+ - **Output Dimensionality:** 384 dimensions
97
+ - **Similarity Function:** Cosine Similarity
98
+ <!-- - **Training Dataset:** Unknown -->
99
+ - **Language:** en
100
+ - **License:** apache-2.0
101
+
102
+ ### Model Sources
103
+
104
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
105
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
106
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
107
+
108
+ ### Full Model Architecture
109
+
110
+ ```
111
+ SentenceTransformer(
112
+ (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel
113
+ (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
114
+ )
115
+ ```
116
+
117
+ ## Usage
118
+
119
+ ### Direct Usage (Sentence Transformers)
120
+
121
+ First install the Sentence Transformers library:
122
+
123
+ ```bash
124
+ pip install -U sentence-transformers
125
+ ```
126
+
127
+ Then you can load this model and run inference.
128
+ ```python
129
+ from sentence_transformers import SentenceTransformer
130
+
131
+ # Download from the 🤗 Hub
132
+ model = SentenceTransformer("sentence_transformers_model_id")
133
+ # Run inference
134
+ sentences = [
135
+ 'Schori i Lidingö',
136
+ 'Yordan Canev',
137
+ 'ကားပေါ့ အန်နာတိုလီ',
138
+ ]
139
+ embeddings = model.encode(sentences)
140
+ print(embeddings.shape)
141
+ # [3, 384]
142
+
143
+ # Get the similarity scores for the embeddings
144
+ similarities = model.similarity(embeddings, embeddings)
145
+ print(similarities.shape)
146
+ # [3, 3]
147
+ ```
148
+
149
+ <!--
150
+ ### Direct Usage (Transformers)
151
+
152
+ <details><summary>Click to see the direct usage in Transformers</summary>
153
+
154
+ </details>
155
+ -->
156
+
157
+ <!--
158
+ ### Downstream Usage (Sentence Transformers)
159
+
160
+ You can finetune this model on your own dataset.
161
+
162
+ <details><summary>Click to expand</summary>
163
+
164
+ </details>
165
+ -->
166
+
167
+ <!--
168
+ ### Out-of-Scope Use
169
+
170
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
171
+ -->
172
+
173
+ ## Evaluation
174
+
175
+ ### Metrics
176
+
177
+ #### Binary Classification
178
+
179
+ * Dataset: `sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2`
180
+ * Evaluated with [<code>BinaryClassificationEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.BinaryClassificationEvaluator)
181
+
182
+ | Metric | Value |
183
+ |:--------------------------|:----------|
184
+ | cosine_accuracy | 0.9829 |
185
+ | cosine_accuracy_threshold | 0.7553 |
186
+ | cosine_f1 | 0.9739 |
187
+ | cosine_f1_threshold | 0.7402 |
188
+ | cosine_precision | 0.9661 |
189
+ | cosine_recall | 0.9818 |
190
+ | **cosine_ap** | **0.995** |
191
+ | cosine_mcc | 0.9612 |
192
+
193
+ <!--
194
+ ## Bias, Risks and Limitations
195
+
196
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
197
+ -->
198
+
199
+ <!--
200
+ ### Recommendations
201
+
202
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
203
+ -->
204
+
205
+ ## Training Details
206
+
207
+ ### Training Dataset
208
+
209
+ #### Unnamed Dataset
210
+
211
+ * Size: 2,130,621 training samples
212
+ * Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>label</code>
213
+ * Approximate statistics based on the first 1000 samples:
214
+ | | sentence1 | sentence2 | label |
215
+ |:--------|:---------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|:---------------------------------------------------------------|
216
+ | type | string | string | float |
217
+ | details | <ul><li>min: 3 tokens</li><li>mean: 9.32 tokens</li><li>max: 57 tokens</li></ul> | <ul><li>min: 3 tokens</li><li>mean: 9.16 tokens</li><li>max: 54 tokens</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.34</li><li>max: 1.0</li></ul> |
218
+ * Samples:
219
+ | sentence1 | sentence2 | label |
220
+ |:----------------------------------|:------------------------------------|:-----------------|
221
+ | <code>캐스린 설리번</code> | <code>Kathryn D. Sullivanová</code> | <code>1.0</code> |
222
+ | <code>ଶିବରାଜ ଅଧାଲରାଓ ପାଟିଲ</code> | <code>Aleksander Lubocki</code> | <code>0.0</code> |
223
+ | <code>Пырванов, Георги</code> | <code>アナトーリー・セルジュコフ</code> | <code>0.0</code> |
224
+ * Loss: [<code>ContrastiveLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#contrastiveloss) with these parameters:
225
+ ```json
226
+ {
227
+ "distance_metric": "SiameseDistanceMetric.COSINE_DISTANCE",
228
+ "margin": 0.5,
229
+ "size_average": true
230
+ }
231
+ ```
232
+
233
+ ### Evaluation Dataset
234
+
235
+ #### Unnamed Dataset
236
+
237
+ * Size: 2,663,276 evaluation samples
238
+ * Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>label</code>
239
+ * Approximate statistics based on the first 1000 samples:
240
+ | | sentence1 | sentence2 | label |
241
+ |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:---------------------------------------------------------------|
242
+ | type | string | string | float |
243
+ | details | <ul><li>min: 3 tokens</li><li>mean: 9.34 tokens</li><li>max: 102 tokens</li></ul> | <ul><li>min: 4 tokens</li><li>mean: 9.11 tokens</li><li>max: 100 tokens</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.33</li><li>max: 1.0</li></ul> |
244
+ * Samples:
245
+ | sentence1 | sentence2 | label |
246
+ |:--------------------------------------|:---------------------------------------|:-----------------|
247
+ | <code>Ева Херман</code> | <code>I Xuan Karlos</code> | <code>0.0</code> |
248
+ | <code>Кличков Андрій Євгенович</code> | <code>Андрэй Яўгенавіч Клычкоў</code> | <code>1.0</code> |
249
+ | <code>Кинах А.</code> | <code>Senator John Hickenlooper</code> | <code>0.0</code> |
250
+ * Loss: [<code>ContrastiveLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#contrastiveloss) with these parameters:
251
+ ```json
252
+ {
253
+ "distance_metric": "SiameseDistanceMetric.COSINE_DISTANCE",
254
+ "margin": 0.5,
255
+ "size_average": true
256
+ }
257
+ ```
258
+
259
+ ### Training Hyperparameters
260
+ #### Non-Default Hyperparameters
261
+
262
+ - `eval_strategy`: steps
263
+ - `per_device_train_batch_size`: 1000
264
+ - `per_device_eval_batch_size`: 1000
265
+ - `gradient_accumulation_steps`: 4
266
+ - `learning_rate`: 3e-05
267
+ - `weight_decay`: 0.01
268
+ - `num_train_epochs`: 8
269
+ - `warmup_ratio`: 0.1
270
+ - `fp16_opt_level`: O0
271
+ - `load_best_model_at_end`: True
272
+ - `optim`: adafactor
273
+
274
+ #### All Hyperparameters
275
+ <details><summary>Click to expand</summary>
276
+
277
+ - `overwrite_output_dir`: False
278
+ - `do_predict`: False
279
+ - `eval_strategy`: steps
280
+ - `prediction_loss_only`: True
281
+ - `per_device_train_batch_size`: 1000
282
+ - `per_device_eval_batch_size`: 1000
283
+ - `per_gpu_train_batch_size`: None
284
+ - `per_gpu_eval_batch_size`: None
285
+ - `gradient_accumulation_steps`: 4
286
+ - `eval_accumulation_steps`: None
287
+ - `torch_empty_cache_steps`: None
288
+ - `learning_rate`: 3e-05
289
+ - `weight_decay`: 0.01
290
+ - `adam_beta1`: 0.9
291
+ - `adam_beta2`: 0.999
292
+ - `adam_epsilon`: 1e-08
293
+ - `max_grad_norm`: 1.0
294
+ - `num_train_epochs`: 8
295
+ - `max_steps`: -1
296
+ - `lr_scheduler_type`: linear
297
+ - `lr_scheduler_kwargs`: {}
298
+ - `warmup_ratio`: 0.1
299
+ - `warmup_steps`: 0
300
+ - `log_level`: passive
301
+ - `log_level_replica`: warning
302
+ - `log_on_each_node`: True
303
+ - `logging_nan_inf_filter`: True
304
+ - `save_safetensors`: True
305
+ - `save_on_each_node`: False
306
+ - `save_only_model`: False
307
+ - `restore_callback_states_from_checkpoint`: False
308
+ - `no_cuda`: False
309
+ - `use_cpu`: False
310
+ - `use_mps_device`: False
311
+ - `seed`: 42
312
+ - `data_seed`: None
313
+ - `jit_mode_eval`: False
314
+ - `use_ipex`: False
315
+ - `bf16`: False
316
+ - `fp16`: False
317
+ - `fp16_opt_level`: O0
318
+ - `half_precision_backend`: auto
319
+ - `bf16_full_eval`: False
320
+ - `fp16_full_eval`: False
321
+ - `tf32`: None
322
+ - `local_rank`: 0
323
+ - `ddp_backend`: None
324
+ - `tpu_num_cores`: None
325
+ - `tpu_metrics_debug`: False
326
+ - `debug`: []
327
+ - `dataloader_drop_last`: False
328
+ - `dataloader_num_workers`: 0
329
+ - `dataloader_prefetch_factor`: None
330
+ - `past_index`: -1
331
+ - `disable_tqdm`: False
332
+ - `remove_unused_columns`: True
333
+ - `label_names`: None
334
+ - `load_best_model_at_end`: True
335
+ - `ignore_data_skip`: False
336
+ - `fsdp`: []
337
+ - `fsdp_min_num_params`: 0
338
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
339
+ - `tp_size`: 0
340
+ - `fsdp_transformer_layer_cls_to_wrap`: None
341
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
342
+ - `deepspeed`: None
343
+ - `label_smoothing_factor`: 0.0
344
+ - `optim`: adafactor
345
+ - `optim_args`: None
346
+ - `adafactor`: False
347
+ - `group_by_length`: False
348
+ - `length_column_name`: length
349
+ - `ddp_find_unused_parameters`: None
350
+ - `ddp_bucket_cap_mb`: None
351
+ - `ddp_broadcast_buffers`: False
352
+ - `dataloader_pin_memory`: True
353
+ - `dataloader_persistent_workers`: False
354
+ - `skip_memory_metrics`: True
355
+ - `use_legacy_prediction_loop`: False
356
+ - `push_to_hub`: False
357
+ - `resume_from_checkpoint`: None
358
+ - `hub_model_id`: None
359
+ - `hub_strategy`: every_save
360
+ - `hub_private_repo`: None
361
+ - `hub_always_push`: False
362
+ - `gradient_checkpointing`: False
363
+ - `gradient_checkpointing_kwargs`: None
364
+ - `include_inputs_for_metrics`: False
365
+ - `include_for_metrics`: []
366
+ - `eval_do_concat_batches`: True
367
+ - `fp16_backend`: auto
368
+ - `push_to_hub_model_id`: None
369
+ - `push_to_hub_organization`: None
370
+ - `mp_parameters`:
371
+ - `auto_find_batch_size`: False
372
+ - `full_determinism`: False
373
+ - `torchdynamo`: None
374
+ - `ray_scope`: last
375
+ - `ddp_timeout`: 1800
376
+ - `torch_compile`: False
377
+ - `torch_compile_backend`: None
378
+ - `torch_compile_mode`: None
379
+ - `include_tokens_per_second`: False
380
+ - `include_num_input_tokens_seen`: False
381
+ - `neftune_noise_alpha`: None
382
+ - `optim_target_modules`: None
383
+ - `batch_eval_metrics`: False
384
+ - `eval_on_start`: False
385
+ - `use_liger_kernel`: False
386
+ - `eval_use_gather_object`: False
387
+ - `average_tokens_across_devices`: False
388
+ - `prompts`: None
389
+ - `batch_sampler`: batch_sampler
390
+ - `multi_dataset_batch_sampler`: proportional
391
+
392
+ </details>
393
+
394
+ ### Training Logs
395
+ | Epoch | Step | Training Loss | Validation Loss | sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap |
396
+ |:------:|:----:|:-------------:|:---------------:|:---------------------------------------------------------------------:|
397
+ | -1 | -1 | - | - | 0.7140 |
398
+ | 0.1877 | 100 | - | 0.0125 | 0.8849 |
399
+ | 0.3754 | 200 | - | 0.0090 | 0.9369 |
400
+ | 0.5631 | 300 | - | 0.0068 | 0.9630 |
401
+ | 0.7508 | 400 | - | 0.0052 | 0.9774 |
402
+ | 0.9385 | 500 | 0.0409 | 0.0040 | 0.9845 |
403
+ | 1.1276 | 600 | - | 0.0033 | 0.9887 |
404
+ | 1.3153 | 700 | - | 0.0028 | 0.9911 |
405
+ | 1.5031 | 800 | - | 0.0026 | 0.9927 |
406
+ | 1.6908 | 900 | - | 0.0022 | 0.9938 |
407
+ | 1.8785 | 1000 | 0.0131 | 0.0022 | 0.9944 |
408
+ | 2.0676 | 1100 | - | 0.0019 | 0.9950 |
409
+
410
+
411
+ ### Framework Versions
412
+ - Python: 3.12.9
413
+ - Sentence Transformers: 3.4.1
414
+ - Transformers: 4.51.3
415
+ - PyTorch: 2.7.0+cu126
416
+ - Accelerate: 1.6.0
417
+ - Datasets: 3.6.0
418
+ - Tokenizers: 0.21.1
419
+
420
+ ## Citation
421
+
422
+ ### BibTeX
423
+
424
+ #### Sentence Transformers
425
+ ```bibtex
426
+ @inproceedings{reimers-2019-sentence-bert,
427
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
428
+ author = "Reimers, Nils and Gurevych, Iryna",
429
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
430
+ month = "11",
431
+ year = "2019",
432
+ publisher = "Association for Computational Linguistics",
433
+ url = "https://arxiv.org/abs/1908.10084",
434
+ }
435
+ ```
436
+
437
+ #### ContrastiveLoss
438
+ ```bibtex
439
+ @inproceedings{hadsell2006dimensionality,
440
+ author={Hadsell, R. and Chopra, S. and LeCun, Y.},
441
+ booktitle={2006 IEEE Computer Society Conference on Computer Vision and Pattern Recognition (CVPR'06)},
442
+ title={Dimensionality Reduction by Learning an Invariant Mapping},
443
+ year={2006},
444
+ volume={2},
445
+ number={},
446
+ pages={1735-1742},
447
+ doi={10.1109/CVPR.2006.100}
448
+ }
449
+ ```
450
+
451
+ <!--
452
+ ## Glossary
453
+
454
+ *Clearly define terms in order to be accessible across audiences.*
455
+ -->
456
+
457
+ <!--
458
+ ## Model Card Authors
459
+
460
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
461
+ -->
462
+
463
+ <!--
464
+ ## Model Card Contact
465
+
466
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
467
+ -->
checkpoint-1100/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertModel"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 384,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 1536,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "bert",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 12,
18
+ "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
20
+ "torch_dtype": "float32",
21
+ "transformers_version": "4.51.3",
22
+ "type_vocab_size": 2,
23
+ "use_cache": true,
24
+ "vocab_size": 250037
25
+ }
checkpoint-1100/config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "3.4.1",
4
+ "transformers": "4.51.3",
5
+ "pytorch": "2.7.0+cu126"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": "cosine"
10
+ }
checkpoint-1100/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:763540a5075ed486170f85323b5ee9b40182439ea8f51d889e8674424cce13c2
3
+ size 470637416
checkpoint-1100/modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ }
14
+ ]
checkpoint-1100/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0711e7f2b2b4728583424781a68346e4cb105f82c5b3e33e835ff6603b1b546
3
+ size 1715019
checkpoint-1100/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d533d8579fdbb2634c2232f32ae13c2e79a071512c8f417a9f5453a5c0587c27
3
+ size 14645
checkpoint-1100/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a3efc25b9c32ace074d8642ed698ba4f27854c75c4022587a44f288f2399a9b
3
+ size 1465
checkpoint-1100/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 128,
3
+ "do_lower_case": false
4
+ }
checkpoint-1100/special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
checkpoint-1100/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cad551d5600a84242d0973327029452a1e3672ba6313c2a3c3d69c4310e12719
3
+ size 17082987
checkpoint-1100/tokenizer_config.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "250001": {
36
+ "content": "<mask>",
37
+ "lstrip": true,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "<s>",
45
+ "clean_up_tokenization_spaces": false,
46
+ "cls_token": "<s>",
47
+ "do_lower_case": true,
48
+ "eos_token": "</s>",
49
+ "extra_special_tokens": {},
50
+ "mask_token": "<mask>",
51
+ "max_length": 128,
52
+ "model_max_length": 128,
53
+ "pad_to_multiple_of": null,
54
+ "pad_token": "<pad>",
55
+ "pad_token_type_id": 0,
56
+ "padding_side": "right",
57
+ "sep_token": "</s>",
58
+ "stride": 0,
59
+ "strip_accents": null,
60
+ "tokenize_chinese_chars": true,
61
+ "tokenizer_class": "BertTokenizer",
62
+ "truncation_side": "right",
63
+ "truncation_strategy": "longest_first",
64
+ "unk_token": "<unk>"
65
+ }
checkpoint-1100/trainer_state.json ADDED
@@ -0,0 +1,233 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 1100,
3
+ "best_metric": 0.0018734760815277696,
4
+ "best_model_checkpoint": "data/fine-tuned-sbert-sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2-original-adafactor/checkpoint-1100",
5
+ "epoch": 2.0675739089629284,
6
+ "eval_steps": 100,
7
+ "global_step": 1100,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.18770530267480057,
14
+ "eval_loss": 0.012530049309134483,
15
+ "eval_runtime": 812.6802,
16
+ "eval_samples_per_second": 3277.151,
17
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.8778235859541618,
18
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7128396034240723,
19
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.8848748516159781,
20
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.812583495899967,
21
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.6880456209182739,
22
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.7185793630359445,
23
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.7900823930955021,
24
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.8364038065429271,
25
+ "eval_steps_per_second": 3.278,
26
+ "step": 100
27
+ },
28
+ {
29
+ "epoch": 0.37541060534960113,
30
+ "eval_loss": 0.009013425558805466,
31
+ "eval_runtime": 792.9843,
32
+ "eval_samples_per_second": 3358.548,
33
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9164113424048541,
34
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7378441095352173,
35
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9368603114664952,
36
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.8729798695775446,
37
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7272344827651978,
38
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.8103205315460159,
39
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.8605654745268148,
40
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.8857576838544123,
41
+ "eval_steps_per_second": 3.359,
42
+ "step": 200
43
+ },
44
+ {
45
+ "epoch": 0.5631159080244017,
46
+ "eval_loss": 0.006819029338657856,
47
+ "eval_runtime": 809.9704,
48
+ "eval_samples_per_second": 3288.115,
49
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9398298338890391,
50
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7449667453765869,
51
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9629957356284182,
52
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9088032597499417,
53
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7449667453765869,
54
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.864029341509194,
55
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.8990159430733201,
56
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9188060251084542,
57
+ "eval_steps_per_second": 3.289,
58
+ "step": 300
59
+ },
60
+ {
61
+ "epoch": 0.7508212106992023,
62
+ "eval_loss": 0.005150709766894579,
63
+ "eval_runtime": 797.9199,
64
+ "eval_samples_per_second": 3337.773,
65
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9560016220600163,
66
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7553268671035767,
67
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9774059659768239,
68
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9333702119012406,
69
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7449506521224976,
70
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.9005457325671423,
71
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.916037892637527,
72
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9513710688929036,
73
+ "eval_steps_per_second": 3.339,
74
+ "step": 400
75
+ },
76
+ {
77
+ "epoch": 0.9385265133740028,
78
+ "grad_norm": 0.17396493256092072,
79
+ "learning_rate": 2.9428198433420364e-05,
80
+ "loss": 0.0409,
81
+ "step": 500
82
+ },
83
+ {
84
+ "epoch": 0.9385265133740028,
85
+ "eval_loss": 0.003973629325628281,
86
+ "eval_runtime": 809.4532,
87
+ "eval_samples_per_second": 3290.216,
88
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9655950557207654,
89
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7622435092926025,
90
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9845099503823473,
91
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9477742208778024,
92
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7535413503646851,
93
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.9221773981286795,
94
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.9367750202319935,
95
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9590347859107281,
96
+ "eval_steps_per_second": 3.291,
97
+ "step": 500
98
+ },
99
+ {
100
+ "epoch": 1.1276396058188645,
101
+ "eval_loss": 0.0032712339889258146,
102
+ "eval_runtime": 793.7573,
103
+ "eval_samples_per_second": 3355.277,
104
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9712722657775374,
105
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7610360383987427,
106
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9887055977101925,
107
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9564087809158087,
108
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7610177993774414,
109
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.9350876149915242,
110
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.9471753898932449,
111
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9658239646502422,
112
+ "eval_steps_per_second": 3.356,
113
+ "step": 600
114
+ },
115
+ {
116
+ "epoch": 1.3153449084936648,
117
+ "eval_loss": 0.0028166945558041334,
118
+ "eval_runtime": 815.1943,
119
+ "eval_samples_per_second": 3267.044,
120
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9751246583160614,
121
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7577522993087769,
122
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9911117019106511,
123
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9621558129059113,
124
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7424367666244507,
125
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.943665667488554,
126
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.9536134909690983,
127
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9708525597505264,
128
+ "eval_steps_per_second": 3.268,
129
+ "step": 700
130
+ },
131
+ {
132
+ "epoch": 1.5030502111684654,
133
+ "eval_loss": 0.0026242006570100784,
134
+ "eval_runtime": 805.7115,
135
+ "eval_samples_per_second": 3305.496,
136
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9782673995974888,
137
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7254683971405029,
138
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9927214598054878,
139
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9669240257663667,
140
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7145971059799194,
141
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.9507846488068235,
142
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.9597660102710608,
143
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9741896137072368,
144
+ "eval_steps_per_second": 3.306,
145
+ "step": 800
146
+ },
147
+ {
148
+ "epoch": 1.690755513843266,
149
+ "eval_loss": 0.002248650649562478,
150
+ "eval_runtime": 818.5338,
151
+ "eval_samples_per_second": 3253.715,
152
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9801973506353069,
153
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7349117994308472,
154
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9938133122786723,
155
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9698356230196407,
156
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7348856329917908,
157
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.9551340483533577,
158
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.9641228578901284,
159
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9756164919507957,
160
+ "eval_steps_per_second": 3.255,
161
+ "step": 900
162
+ },
163
+ {
164
+ "epoch": 1.8784608165180665,
165
+ "grad_norm": 0.07541557401418686,
166
+ "learning_rate": 2.5511749347258486e-05,
167
+ "loss": 0.0131,
168
+ "step": 1000
169
+ },
170
+ {
171
+ "epoch": 1.8784608165180665,
172
+ "eval_loss": 0.002240537665784359,
173
+ "eval_runtime": 803.6286,
174
+ "eval_samples_per_second": 3314.063,
175
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9817931272716349,
176
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7197962999343872,
177
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9944127523785896,
178
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9722373310278887,
179
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7091608047485352,
180
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.9587183163648803,
181
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.9675121928984912,
182
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9770088489465266,
183
+ "eval_steps_per_second": 3.315,
184
+ "step": 1000
185
+ },
186
+ {
187
+ "epoch": 2.0675739089629284,
188
+ "eval_loss": 0.0018734760815277696,
189
+ "eval_runtime": 807.0812,
190
+ "eval_samples_per_second": 3299.886,
191
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9828594815415578,
192
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7552986741065979,
193
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9950493119597241,
194
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.973889221813201,
195
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7401974201202393,
196
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.9611601510291333,
197
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.9661201195760486,
198
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9817842882294052,
199
+ "eval_steps_per_second": 3.301,
200
+ "step": 1100
201
+ }
202
+ ],
203
+ "logging_steps": 500,
204
+ "max_steps": 4256,
205
+ "num_input_tokens_seen": 0,
206
+ "num_train_epochs": 8,
207
+ "save_steps": 100,
208
+ "stateful_callbacks": {
209
+ "EarlyStoppingCallback": {
210
+ "args": {
211
+ "early_stopping_patience": 1,
212
+ "early_stopping_threshold": 0.0
213
+ },
214
+ "attributes": {
215
+ "early_stopping_patience_counter": 0
216
+ }
217
+ },
218
+ "TrainerControl": {
219
+ "args": {
220
+ "should_epoch_stop": false,
221
+ "should_evaluate": false,
222
+ "should_log": false,
223
+ "should_save": true,
224
+ "should_training_stop": false
225
+ },
226
+ "attributes": {}
227
+ }
228
+ },
229
+ "total_flos": 0.0,
230
+ "train_batch_size": 1000,
231
+ "trial_name": null,
232
+ "trial_params": null
233
+ }
checkpoint-1100/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9339753774865faea550d7da93688221ca0f43171c16e3034645a2149992c8a6
3
+ size 6033
checkpoint-1100/unigram.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da145b5e7700ae40f16691ec32a0b1fdc1ee3298db22a31ea55f57a966c4a65d
3
+ size 14763260
checkpoint-1200/1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 384,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
checkpoint-1200/README.md ADDED
@@ -0,0 +1,468 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ license: apache-2.0
5
+ tags:
6
+ - sentence-transformers
7
+ - sentence-similarity
8
+ - feature-extraction
9
+ - generated_from_trainer
10
+ - dataset_size:2130621
11
+ - loss:ContrastiveLoss
12
+ base_model: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
13
+ widget:
14
+ - source_sentence: Kim Chol-sam
15
+ sentences:
16
+ - Stankevich Sergey Nikolayevich
17
+ - Kim Chin-So’k
18
+ - Julen Lopetegui Agote
19
+ - source_sentence: دينا بنت عبد الحميد
20
+ sentences:
21
+ - Alexia van Amsberg
22
+ - Anthony Nicholas Colin Maitland Biddulph, 5th Baron Biddulph
23
+ - Dina bint Abdul-Hamíd
24
+ - source_sentence: Մուհամեդ բեն Նաիֆ Ալ Սաուդ
25
+ sentences:
26
+ - Karpov Anatoly Evgenyevich
27
+ - GNPower Mariveles Coal Plant [former]
28
+ - Muhammed bin Nayef bin Abdul Aziz Al Saud
29
+ - source_sentence: Edward Gnehm
30
+ sentences:
31
+ - Шауэрте, Хартмут
32
+ - Ханзада Филипп, Эдинбург герцогі
33
+ - AFX
34
+ - source_sentence: Schori i Lidingö
35
+ sentences:
36
+ - Yordan Canev
37
+ - ကားပေါ့ အန်နာတိုလီ
38
+ - BYSTROV, Mikhail Ivanovich
39
+ pipeline_tag: sentence-similarity
40
+ library_name: sentence-transformers
41
+ metrics:
42
+ - cosine_accuracy
43
+ - cosine_accuracy_threshold
44
+ - cosine_f1
45
+ - cosine_f1_threshold
46
+ - cosine_precision
47
+ - cosine_recall
48
+ - cosine_ap
49
+ - cosine_mcc
50
+ model-index:
51
+ - name: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2-name-matcher-original
52
+ results:
53
+ - task:
54
+ type: binary-classification
55
+ name: Binary Classification
56
+ dataset:
57
+ name: sentence transformers paraphrase multilingual MiniLM L12 v2
58
+ type: sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2
59
+ metrics:
60
+ - type: cosine_accuracy
61
+ value: 0.9843050674356433
62
+ name: Cosine Accuracy
63
+ - type: cosine_accuracy_threshold
64
+ value: 0.742120623588562
65
+ name: Cosine Accuracy Threshold
66
+ - type: cosine_f1
67
+ value: 0.9760932477723254
68
+ name: Cosine F1
69
+ - type: cosine_f1_threshold
70
+ value: 0.742120623588562
71
+ name: Cosine F1 Threshold
72
+ - type: cosine_precision
73
+ value: 0.9703216856372878
74
+ name: Cosine Precision
75
+ - type: cosine_recall
76
+ value: 0.9819338803033267
77
+ name: Cosine Recall
78
+ - type: cosine_ap
79
+ value: 0.9955554741842152
80
+ name: Cosine Ap
81
+ - type: cosine_mcc
82
+ value: 0.964449493634366
83
+ name: Cosine Mcc
84
+ ---
85
+
86
+ # sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2-name-matcher-original
87
+
88
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2). It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
89
+
90
+ ## Model Details
91
+
92
+ ### Model Description
93
+ - **Model Type:** Sentence Transformer
94
+ - **Base model:** [sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2) <!-- at revision 86741b4e3f5cb7765a600d3a3d55a0f6a6cb443d -->
95
+ - **Maximum Sequence Length:** 128 tokens
96
+ - **Output Dimensionality:** 384 dimensions
97
+ - **Similarity Function:** Cosine Similarity
98
+ <!-- - **Training Dataset:** Unknown -->
99
+ - **Language:** en
100
+ - **License:** apache-2.0
101
+
102
+ ### Model Sources
103
+
104
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
105
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
106
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
107
+
108
+ ### Full Model Architecture
109
+
110
+ ```
111
+ SentenceTransformer(
112
+ (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel
113
+ (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
114
+ )
115
+ ```
116
+
117
+ ## Usage
118
+
119
+ ### Direct Usage (Sentence Transformers)
120
+
121
+ First install the Sentence Transformers library:
122
+
123
+ ```bash
124
+ pip install -U sentence-transformers
125
+ ```
126
+
127
+ Then you can load this model and run inference.
128
+ ```python
129
+ from sentence_transformers import SentenceTransformer
130
+
131
+ # Download from the 🤗 Hub
132
+ model = SentenceTransformer("sentence_transformers_model_id")
133
+ # Run inference
134
+ sentences = [
135
+ 'Schori i Lidingö',
136
+ 'Yordan Canev',
137
+ 'ကားပေါ့ အန်နာတိုလီ',
138
+ ]
139
+ embeddings = model.encode(sentences)
140
+ print(embeddings.shape)
141
+ # [3, 384]
142
+
143
+ # Get the similarity scores for the embeddings
144
+ similarities = model.similarity(embeddings, embeddings)
145
+ print(similarities.shape)
146
+ # [3, 3]
147
+ ```
148
+
149
+ <!--
150
+ ### Direct Usage (Transformers)
151
+
152
+ <details><summary>Click to see the direct usage in Transformers</summary>
153
+
154
+ </details>
155
+ -->
156
+
157
+ <!--
158
+ ### Downstream Usage (Sentence Transformers)
159
+
160
+ You can finetune this model on your own dataset.
161
+
162
+ <details><summary>Click to expand</summary>
163
+
164
+ </details>
165
+ -->
166
+
167
+ <!--
168
+ ### Out-of-Scope Use
169
+
170
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
171
+ -->
172
+
173
+ ## Evaluation
174
+
175
+ ### Metrics
176
+
177
+ #### Binary Classification
178
+
179
+ * Dataset: `sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2`
180
+ * Evaluated with [<code>BinaryClassificationEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.BinaryClassificationEvaluator)
181
+
182
+ | Metric | Value |
183
+ |:--------------------------|:-----------|
184
+ | cosine_accuracy | 0.9843 |
185
+ | cosine_accuracy_threshold | 0.7421 |
186
+ | cosine_f1 | 0.9761 |
187
+ | cosine_f1_threshold | 0.7421 |
188
+ | cosine_precision | 0.9703 |
189
+ | cosine_recall | 0.9819 |
190
+ | **cosine_ap** | **0.9956** |
191
+ | cosine_mcc | 0.9644 |
192
+
193
+ <!--
194
+ ## Bias, Risks and Limitations
195
+
196
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
197
+ -->
198
+
199
+ <!--
200
+ ### Recommendations
201
+
202
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
203
+ -->
204
+
205
+ ## Training Details
206
+
207
+ ### Training Dataset
208
+
209
+ #### Unnamed Dataset
210
+
211
+ * Size: 2,130,621 training samples
212
+ * Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>label</code>
213
+ * Approximate statistics based on the first 1000 samples:
214
+ | | sentence1 | sentence2 | label |
215
+ |:--------|:---------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|:---------------------------------------------------------------|
216
+ | type | string | string | float |
217
+ | details | <ul><li>min: 3 tokens</li><li>mean: 9.32 tokens</li><li>max: 57 tokens</li></ul> | <ul><li>min: 3 tokens</li><li>mean: 9.16 tokens</li><li>max: 54 tokens</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.34</li><li>max: 1.0</li></ul> |
218
+ * Samples:
219
+ | sentence1 | sentence2 | label |
220
+ |:----------------------------------|:------------------------------------|:-----------------|
221
+ | <code>캐스린 설리번</code> | <code>Kathryn D. Sullivanová</code> | <code>1.0</code> |
222
+ | <code>ଶିବରାଜ ଅଧାଲରାଓ ପାଟିଲ</code> | <code>Aleksander Lubocki</code> | <code>0.0</code> |
223
+ | <code>Пырванов, Георги</code> | <code>アナトーリー・セルジュコフ</code> | <code>0.0</code> |
224
+ * Loss: [<code>ContrastiveLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#contrastiveloss) with these parameters:
225
+ ```json
226
+ {
227
+ "distance_metric": "SiameseDistanceMetric.COSINE_DISTANCE",
228
+ "margin": 0.5,
229
+ "size_average": true
230
+ }
231
+ ```
232
+
233
+ ### Evaluation Dataset
234
+
235
+ #### Unnamed Dataset
236
+
237
+ * Size: 2,663,276 evaluation samples
238
+ * Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>label</code>
239
+ * Approximate statistics based on the first 1000 samples:
240
+ | | sentence1 | sentence2 | label |
241
+ |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:---------------------------------------------------------------|
242
+ | type | string | string | float |
243
+ | details | <ul><li>min: 3 tokens</li><li>mean: 9.34 tokens</li><li>max: 102 tokens</li></ul> | <ul><li>min: 4 tokens</li><li>mean: 9.11 tokens</li><li>max: 100 tokens</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.33</li><li>max: 1.0</li></ul> |
244
+ * Samples:
245
+ | sentence1 | sentence2 | label |
246
+ |:--------------------------------------|:---------------------------------------|:-----------------|
247
+ | <code>Ева Херман</code> | <code>I Xuan Karlos</code> | <code>0.0</code> |
248
+ | <code>Кличков Андрій Євгенович</code> | <code>Андрэй Яўгенавіч Клычкоў</code> | <code>1.0</code> |
249
+ | <code>Кинах А.</code> | <code>Senator John Hickenlooper</code> | <code>0.0</code> |
250
+ * Loss: [<code>ContrastiveLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#contrastiveloss) with these parameters:
251
+ ```json
252
+ {
253
+ "distance_metric": "SiameseDistanceMetric.COSINE_DISTANCE",
254
+ "margin": 0.5,
255
+ "size_average": true
256
+ }
257
+ ```
258
+
259
+ ### Training Hyperparameters
260
+ #### Non-Default Hyperparameters
261
+
262
+ - `eval_strategy`: steps
263
+ - `per_device_train_batch_size`: 1000
264
+ - `per_device_eval_batch_size`: 1000
265
+ - `gradient_accumulation_steps`: 4
266
+ - `learning_rate`: 3e-05
267
+ - `weight_decay`: 0.01
268
+ - `num_train_epochs`: 8
269
+ - `warmup_ratio`: 0.1
270
+ - `fp16_opt_level`: O0
271
+ - `load_best_model_at_end`: True
272
+ - `optim`: adafactor
273
+
274
+ #### All Hyperparameters
275
+ <details><summary>Click to expand</summary>
276
+
277
+ - `overwrite_output_dir`: False
278
+ - `do_predict`: False
279
+ - `eval_strategy`: steps
280
+ - `prediction_loss_only`: True
281
+ - `per_device_train_batch_size`: 1000
282
+ - `per_device_eval_batch_size`: 1000
283
+ - `per_gpu_train_batch_size`: None
284
+ - `per_gpu_eval_batch_size`: None
285
+ - `gradient_accumulation_steps`: 4
286
+ - `eval_accumulation_steps`: None
287
+ - `torch_empty_cache_steps`: None
288
+ - `learning_rate`: 3e-05
289
+ - `weight_decay`: 0.01
290
+ - `adam_beta1`: 0.9
291
+ - `adam_beta2`: 0.999
292
+ - `adam_epsilon`: 1e-08
293
+ - `max_grad_norm`: 1.0
294
+ - `num_train_epochs`: 8
295
+ - `max_steps`: -1
296
+ - `lr_scheduler_type`: linear
297
+ - `lr_scheduler_kwargs`: {}
298
+ - `warmup_ratio`: 0.1
299
+ - `warmup_steps`: 0
300
+ - `log_level`: passive
301
+ - `log_level_replica`: warning
302
+ - `log_on_each_node`: True
303
+ - `logging_nan_inf_filter`: True
304
+ - `save_safetensors`: True
305
+ - `save_on_each_node`: False
306
+ - `save_only_model`: False
307
+ - `restore_callback_states_from_checkpoint`: False
308
+ - `no_cuda`: False
309
+ - `use_cpu`: False
310
+ - `use_mps_device`: False
311
+ - `seed`: 42
312
+ - `data_seed`: None
313
+ - `jit_mode_eval`: False
314
+ - `use_ipex`: False
315
+ - `bf16`: False
316
+ - `fp16`: False
317
+ - `fp16_opt_level`: O0
318
+ - `half_precision_backend`: auto
319
+ - `bf16_full_eval`: False
320
+ - `fp16_full_eval`: False
321
+ - `tf32`: None
322
+ - `local_rank`: 0
323
+ - `ddp_backend`: None
324
+ - `tpu_num_cores`: None
325
+ - `tpu_metrics_debug`: False
326
+ - `debug`: []
327
+ - `dataloader_drop_last`: False
328
+ - `dataloader_num_workers`: 0
329
+ - `dataloader_prefetch_factor`: None
330
+ - `past_index`: -1
331
+ - `disable_tqdm`: False
332
+ - `remove_unused_columns`: True
333
+ - `label_names`: None
334
+ - `load_best_model_at_end`: True
335
+ - `ignore_data_skip`: False
336
+ - `fsdp`: []
337
+ - `fsdp_min_num_params`: 0
338
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
339
+ - `tp_size`: 0
340
+ - `fsdp_transformer_layer_cls_to_wrap`: None
341
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
342
+ - `deepspeed`: None
343
+ - `label_smoothing_factor`: 0.0
344
+ - `optim`: adafactor
345
+ - `optim_args`: None
346
+ - `adafactor`: False
347
+ - `group_by_length`: False
348
+ - `length_column_name`: length
349
+ - `ddp_find_unused_parameters`: None
350
+ - `ddp_bucket_cap_mb`: None
351
+ - `ddp_broadcast_buffers`: False
352
+ - `dataloader_pin_memory`: True
353
+ - `dataloader_persistent_workers`: False
354
+ - `skip_memory_metrics`: True
355
+ - `use_legacy_prediction_loop`: False
356
+ - `push_to_hub`: False
357
+ - `resume_from_checkpoint`: None
358
+ - `hub_model_id`: None
359
+ - `hub_strategy`: every_save
360
+ - `hub_private_repo`: None
361
+ - `hub_always_push`: False
362
+ - `gradient_checkpointing`: False
363
+ - `gradient_checkpointing_kwargs`: None
364
+ - `include_inputs_for_metrics`: False
365
+ - `include_for_metrics`: []
366
+ - `eval_do_concat_batches`: True
367
+ - `fp16_backend`: auto
368
+ - `push_to_hub_model_id`: None
369
+ - `push_to_hub_organization`: None
370
+ - `mp_parameters`:
371
+ - `auto_find_batch_size`: False
372
+ - `full_determinism`: False
373
+ - `torchdynamo`: None
374
+ - `ray_scope`: last
375
+ - `ddp_timeout`: 1800
376
+ - `torch_compile`: False
377
+ - `torch_compile_backend`: None
378
+ - `torch_compile_mode`: None
379
+ - `include_tokens_per_second`: False
380
+ - `include_num_input_tokens_seen`: False
381
+ - `neftune_noise_alpha`: None
382
+ - `optim_target_modules`: None
383
+ - `batch_eval_metrics`: False
384
+ - `eval_on_start`: False
385
+ - `use_liger_kernel`: False
386
+ - `eval_use_gather_object`: False
387
+ - `average_tokens_across_devices`: False
388
+ - `prompts`: None
389
+ - `batch_sampler`: batch_sampler
390
+ - `multi_dataset_batch_sampler`: proportional
391
+
392
+ </details>
393
+
394
+ ### Training Logs
395
+ | Epoch | Step | Training Loss | Validation Loss | sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap |
396
+ |:------:|:----:|:-------------:|:---------------:|:---------------------------------------------------------------------:|
397
+ | -1 | -1 | - | - | 0.7140 |
398
+ | 0.1877 | 100 | - | 0.0125 | 0.8849 |
399
+ | 0.3754 | 200 | - | 0.0090 | 0.9369 |
400
+ | 0.5631 | 300 | - | 0.0068 | 0.9630 |
401
+ | 0.7508 | 400 | - | 0.0052 | 0.9774 |
402
+ | 0.9385 | 500 | 0.0409 | 0.0040 | 0.9845 |
403
+ | 1.1276 | 600 | - | 0.0033 | 0.9887 |
404
+ | 1.3153 | 700 | - | 0.0028 | 0.9911 |
405
+ | 1.5031 | 800 | - | 0.0026 | 0.9927 |
406
+ | 1.6908 | 900 | - | 0.0022 | 0.9938 |
407
+ | 1.8785 | 1000 | 0.0131 | 0.0022 | 0.9944 |
408
+ | 2.0676 | 1100 | - | 0.0019 | 0.9950 |
409
+ | 2.2553 | 1200 | - | 0.0017 | 0.9956 |
410
+
411
+
412
+ ### Framework Versions
413
+ - Python: 3.12.9
414
+ - Sentence Transformers: 3.4.1
415
+ - Transformers: 4.51.3
416
+ - PyTorch: 2.7.0+cu126
417
+ - Accelerate: 1.6.0
418
+ - Datasets: 3.6.0
419
+ - Tokenizers: 0.21.1
420
+
421
+ ## Citation
422
+
423
+ ### BibTeX
424
+
425
+ #### Sentence Transformers
426
+ ```bibtex
427
+ @inproceedings{reimers-2019-sentence-bert,
428
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
429
+ author = "Reimers, Nils and Gurevych, Iryna",
430
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
431
+ month = "11",
432
+ year = "2019",
433
+ publisher = "Association for Computational Linguistics",
434
+ url = "https://arxiv.org/abs/1908.10084",
435
+ }
436
+ ```
437
+
438
+ #### ContrastiveLoss
439
+ ```bibtex
440
+ @inproceedings{hadsell2006dimensionality,
441
+ author={Hadsell, R. and Chopra, S. and LeCun, Y.},
442
+ booktitle={2006 IEEE Computer Society Conference on Computer Vision and Pattern Recognition (CVPR'06)},
443
+ title={Dimensionality Reduction by Learning an Invariant Mapping},
444
+ year={2006},
445
+ volume={2},
446
+ number={},
447
+ pages={1735-1742},
448
+ doi={10.1109/CVPR.2006.100}
449
+ }
450
+ ```
451
+
452
+ <!--
453
+ ## Glossary
454
+
455
+ *Clearly define terms in order to be accessible across audiences.*
456
+ -->
457
+
458
+ <!--
459
+ ## Model Card Authors
460
+
461
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
462
+ -->
463
+
464
+ <!--
465
+ ## Model Card Contact
466
+
467
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
468
+ -->
checkpoint-1200/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertModel"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 384,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 1536,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "bert",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 12,
18
+ "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
20
+ "torch_dtype": "float32",
21
+ "transformers_version": "4.51.3",
22
+ "type_vocab_size": 2,
23
+ "use_cache": true,
24
+ "vocab_size": 250037
25
+ }
checkpoint-1200/config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "3.4.1",
4
+ "transformers": "4.51.3",
5
+ "pytorch": "2.7.0+cu126"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": "cosine"
10
+ }
checkpoint-1200/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a16798609ad3be64f1c33cafbc6d8595006225a97722265fbba67e2dfaf916a
3
+ size 470637416
checkpoint-1200/modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ }
14
+ ]
checkpoint-1200/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e6c0d2369b6fe2e14855dd6ee01f523f4a5901a968149aedc664f3defacc964
3
+ size 1715019
checkpoint-1200/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51a5e12d95d9820ac91d074df8188d98ce5f4fc76cb3ec8a63d860d96a200697
3
+ size 14645
checkpoint-1200/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efdbea4967733a900bbe36cb7fab0e417825ab1560e9b509550180a0f55ecc51
3
+ size 1465
checkpoint-1200/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 128,
3
+ "do_lower_case": false
4
+ }
checkpoint-1200/special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
checkpoint-1200/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cad551d5600a84242d0973327029452a1e3672ba6313c2a3c3d69c4310e12719
3
+ size 17082987
checkpoint-1200/tokenizer_config.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "250001": {
36
+ "content": "<mask>",
37
+ "lstrip": true,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "<s>",
45
+ "clean_up_tokenization_spaces": false,
46
+ "cls_token": "<s>",
47
+ "do_lower_case": true,
48
+ "eos_token": "</s>",
49
+ "extra_special_tokens": {},
50
+ "mask_token": "<mask>",
51
+ "max_length": 128,
52
+ "model_max_length": 128,
53
+ "pad_to_multiple_of": null,
54
+ "pad_token": "<pad>",
55
+ "pad_token_type_id": 0,
56
+ "padding_side": "right",
57
+ "sep_token": "</s>",
58
+ "stride": 0,
59
+ "strip_accents": null,
60
+ "tokenize_chinese_chars": true,
61
+ "tokenizer_class": "BertTokenizer",
62
+ "truncation_side": "right",
63
+ "truncation_strategy": "longest_first",
64
+ "unk_token": "<unk>"
65
+ }
checkpoint-1200/trainer_state.json ADDED
@@ -0,0 +1,249 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 1200,
3
+ "best_metric": 0.0017435119953006506,
4
+ "best_model_checkpoint": "data/fine-tuned-sbert-sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2-original-adafactor/checkpoint-1200",
5
+ "epoch": 2.255279211637729,
6
+ "eval_steps": 100,
7
+ "global_step": 1200,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.18770530267480057,
14
+ "eval_loss": 0.012530049309134483,
15
+ "eval_runtime": 812.6802,
16
+ "eval_samples_per_second": 3277.151,
17
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.8778235859541618,
18
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7128396034240723,
19
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.8848748516159781,
20
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.812583495899967,
21
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.6880456209182739,
22
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.7185793630359445,
23
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.7900823930955021,
24
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.8364038065429271,
25
+ "eval_steps_per_second": 3.278,
26
+ "step": 100
27
+ },
28
+ {
29
+ "epoch": 0.37541060534960113,
30
+ "eval_loss": 0.009013425558805466,
31
+ "eval_runtime": 792.9843,
32
+ "eval_samples_per_second": 3358.548,
33
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9164113424048541,
34
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7378441095352173,
35
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9368603114664952,
36
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.8729798695775446,
37
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7272344827651978,
38
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.8103205315460159,
39
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.8605654745268148,
40
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.8857576838544123,
41
+ "eval_steps_per_second": 3.359,
42
+ "step": 200
43
+ },
44
+ {
45
+ "epoch": 0.5631159080244017,
46
+ "eval_loss": 0.006819029338657856,
47
+ "eval_runtime": 809.9704,
48
+ "eval_samples_per_second": 3288.115,
49
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9398298338890391,
50
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7449667453765869,
51
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9629957356284182,
52
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9088032597499417,
53
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7449667453765869,
54
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.864029341509194,
55
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.8990159430733201,
56
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9188060251084542,
57
+ "eval_steps_per_second": 3.289,
58
+ "step": 300
59
+ },
60
+ {
61
+ "epoch": 0.7508212106992023,
62
+ "eval_loss": 0.005150709766894579,
63
+ "eval_runtime": 797.9199,
64
+ "eval_samples_per_second": 3337.773,
65
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9560016220600163,
66
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7553268671035767,
67
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9774059659768239,
68
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9333702119012406,
69
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7449506521224976,
70
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.9005457325671423,
71
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.916037892637527,
72
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9513710688929036,
73
+ "eval_steps_per_second": 3.339,
74
+ "step": 400
75
+ },
76
+ {
77
+ "epoch": 0.9385265133740028,
78
+ "grad_norm": 0.17396493256092072,
79
+ "learning_rate": 2.9428198433420364e-05,
80
+ "loss": 0.0409,
81
+ "step": 500
82
+ },
83
+ {
84
+ "epoch": 0.9385265133740028,
85
+ "eval_loss": 0.003973629325628281,
86
+ "eval_runtime": 809.4532,
87
+ "eval_samples_per_second": 3290.216,
88
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9655950557207654,
89
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7622435092926025,
90
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9845099503823473,
91
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9477742208778024,
92
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7535413503646851,
93
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.9221773981286795,
94
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.9367750202319935,
95
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9590347859107281,
96
+ "eval_steps_per_second": 3.291,
97
+ "step": 500
98
+ },
99
+ {
100
+ "epoch": 1.1276396058188645,
101
+ "eval_loss": 0.0032712339889258146,
102
+ "eval_runtime": 793.7573,
103
+ "eval_samples_per_second": 3355.277,
104
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9712722657775374,
105
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7610360383987427,
106
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9887055977101925,
107
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9564087809158087,
108
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7610177993774414,
109
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.9350876149915242,
110
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.9471753898932449,
111
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9658239646502422,
112
+ "eval_steps_per_second": 3.356,
113
+ "step": 600
114
+ },
115
+ {
116
+ "epoch": 1.3153449084936648,
117
+ "eval_loss": 0.0028166945558041334,
118
+ "eval_runtime": 815.1943,
119
+ "eval_samples_per_second": 3267.044,
120
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9751246583160614,
121
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7577522993087769,
122
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9911117019106511,
123
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9621558129059113,
124
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7424367666244507,
125
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.943665667488554,
126
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.9536134909690983,
127
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9708525597505264,
128
+ "eval_steps_per_second": 3.268,
129
+ "step": 700
130
+ },
131
+ {
132
+ "epoch": 1.5030502111684654,
133
+ "eval_loss": 0.0026242006570100784,
134
+ "eval_runtime": 805.7115,
135
+ "eval_samples_per_second": 3305.496,
136
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9782673995974888,
137
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7254683971405029,
138
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9927214598054878,
139
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9669240257663667,
140
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7145971059799194,
141
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.9507846488068235,
142
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.9597660102710608,
143
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9741896137072368,
144
+ "eval_steps_per_second": 3.306,
145
+ "step": 800
146
+ },
147
+ {
148
+ "epoch": 1.690755513843266,
149
+ "eval_loss": 0.002248650649562478,
150
+ "eval_runtime": 818.5338,
151
+ "eval_samples_per_second": 3253.715,
152
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9801973506353069,
153
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7349117994308472,
154
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9938133122786723,
155
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9698356230196407,
156
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7348856329917908,
157
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.9551340483533577,
158
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.9641228578901284,
159
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9756164919507957,
160
+ "eval_steps_per_second": 3.255,
161
+ "step": 900
162
+ },
163
+ {
164
+ "epoch": 1.8784608165180665,
165
+ "grad_norm": 0.07541557401418686,
166
+ "learning_rate": 2.5511749347258486e-05,
167
+ "loss": 0.0131,
168
+ "step": 1000
169
+ },
170
+ {
171
+ "epoch": 1.8784608165180665,
172
+ "eval_loss": 0.002240537665784359,
173
+ "eval_runtime": 803.6286,
174
+ "eval_samples_per_second": 3314.063,
175
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9817931272716349,
176
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7197962999343872,
177
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9944127523785896,
178
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9722373310278887,
179
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7091608047485352,
180
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.9587183163648803,
181
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.9675121928984912,
182
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9770088489465266,
183
+ "eval_steps_per_second": 3.315,
184
+ "step": 1000
185
+ },
186
+ {
187
+ "epoch": 2.0675739089629284,
188
+ "eval_loss": 0.0018734760815277696,
189
+ "eval_runtime": 807.0812,
190
+ "eval_samples_per_second": 3299.886,
191
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9828594815415578,
192
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7552986741065979,
193
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9950493119597241,
194
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.973889221813201,
195
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7401974201202393,
196
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.9611601510291333,
197
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.9661201195760486,
198
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9817842882294052,
199
+ "eval_steps_per_second": 3.301,
200
+ "step": 1100
201
+ },
202
+ {
203
+ "epoch": 2.255279211637729,
204
+ "eval_loss": 0.0017435119953006506,
205
+ "eval_runtime": 802.6162,
206
+ "eval_samples_per_second": 3318.244,
207
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9843050674356433,
208
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.742120623588562,
209
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9955554741842152,
210
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9760932477723254,
211
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.742120623588562,
212
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.964449493634366,
213
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.9703216856372878,
214
+ "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9819338803033267,
215
+ "eval_steps_per_second": 3.319,
216
+ "step": 1200
217
+ }
218
+ ],
219
+ "logging_steps": 500,
220
+ "max_steps": 4256,
221
+ "num_input_tokens_seen": 0,
222
+ "num_train_epochs": 8,
223
+ "save_steps": 100,
224
+ "stateful_callbacks": {
225
+ "EarlyStoppingCallback": {
226
+ "args": {
227
+ "early_stopping_patience": 1,
228
+ "early_stopping_threshold": 0.0
229
+ },
230
+ "attributes": {
231
+ "early_stopping_patience_counter": 0
232
+ }
233
+ },
234
+ "TrainerControl": {
235
+ "args": {
236
+ "should_epoch_stop": false,
237
+ "should_evaluate": false,
238
+ "should_log": false,
239
+ "should_save": true,
240
+ "should_training_stop": false
241
+ },
242
+ "attributes": {}
243
+ }
244
+ },
245
+ "total_flos": 0.0,
246
+ "train_batch_size": 1000,
247
+ "trial_name": null,
248
+ "trial_params": null
249
+ }
checkpoint-1200/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9339753774865faea550d7da93688221ca0f43171c16e3034645a2149992c8a6
3
+ size 6033
checkpoint-1200/unigram.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da145b5e7700ae40f16691ec32a0b1fdc1ee3298db22a31ea55f57a966c4a65d
3
+ size 14763260
checkpoint-1300/1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 384,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }