rjurney commited on
Commit
2187172
·
unverified ·
1 Parent(s): ccc1d6b

Removing old model values

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. 1_Pooling/config.json +0 -10
  2. checkpoint-1000/1_Pooling/config.json +0 -10
  3. checkpoint-1000/README.md +0 -466
  4. checkpoint-1000/config.json +0 -25
  5. checkpoint-1000/config_sentence_transformers.json +0 -10
  6. checkpoint-1000/model.safetensors +0 -3
  7. checkpoint-1000/modules.json +0 -14
  8. checkpoint-1000/optimizer.pt +0 -3
  9. checkpoint-1000/rng_state.pth +0 -3
  10. checkpoint-1000/scheduler.pt +0 -3
  11. checkpoint-1000/sentence_bert_config.json +0 -4
  12. checkpoint-1000/special_tokens_map.json +0 -51
  13. checkpoint-1000/tokenizer.json +0 -3
  14. checkpoint-1000/tokenizer_config.json +0 -65
  15. checkpoint-1000/trainer_state.json +0 -217
  16. checkpoint-1000/training_args.bin +0 -3
  17. checkpoint-1000/unigram.json +0 -3
  18. checkpoint-1100/1_Pooling/config.json +0 -10
  19. checkpoint-1100/README.md +0 -467
  20. checkpoint-1100/config.json +0 -25
  21. checkpoint-1100/config_sentence_transformers.json +0 -10
  22. checkpoint-1100/model.safetensors +0 -3
  23. checkpoint-1100/modules.json +0 -14
  24. checkpoint-1100/optimizer.pt +0 -3
  25. checkpoint-1100/rng_state.pth +0 -3
  26. checkpoint-1100/scheduler.pt +0 -3
  27. checkpoint-1100/sentence_bert_config.json +0 -4
  28. checkpoint-1100/special_tokens_map.json +0 -51
  29. checkpoint-1100/tokenizer.json +0 -3
  30. checkpoint-1100/tokenizer_config.json +0 -65
  31. checkpoint-1100/trainer_state.json +0 -233
  32. checkpoint-1100/training_args.bin +0 -3
  33. checkpoint-1100/unigram.json +0 -3
  34. checkpoint-1200/1_Pooling/config.json +0 -10
  35. checkpoint-1200/README.md +0 -468
  36. checkpoint-1200/config.json +0 -25
  37. checkpoint-1200/config_sentence_transformers.json +0 -10
  38. checkpoint-1200/model.safetensors +0 -3
  39. checkpoint-1200/modules.json +0 -14
  40. checkpoint-1200/optimizer.pt +0 -3
  41. checkpoint-1200/rng_state.pth +0 -3
  42. checkpoint-1200/scheduler.pt +0 -3
  43. checkpoint-1200/sentence_bert_config.json +0 -4
  44. checkpoint-1200/special_tokens_map.json +0 -51
  45. checkpoint-1200/tokenizer.json +0 -3
  46. checkpoint-1200/tokenizer_config.json +0 -65
  47. checkpoint-1200/trainer_state.json +0 -249
  48. checkpoint-1200/training_args.bin +0 -3
  49. checkpoint-1200/unigram.json +0 -3
  50. checkpoint-1300/1_Pooling/config.json +0 -10
1_Pooling/config.json DELETED
@@ -1,10 +0,0 @@
1
- {
2
- "word_embedding_dimension": 384,
3
- "pooling_mode_cls_token": false,
4
- "pooling_mode_mean_tokens": true,
5
- "pooling_mode_max_tokens": false,
6
- "pooling_mode_mean_sqrt_len_tokens": false,
7
- "pooling_mode_weightedmean_tokens": false,
8
- "pooling_mode_lasttoken": false,
9
- "include_prompt": true
10
- }
 
 
 
 
 
 
 
 
 
 
 
checkpoint-1000/1_Pooling/config.json DELETED
@@ -1,10 +0,0 @@
1
- {
2
- "word_embedding_dimension": 384,
3
- "pooling_mode_cls_token": false,
4
- "pooling_mode_mean_tokens": true,
5
- "pooling_mode_max_tokens": false,
6
- "pooling_mode_mean_sqrt_len_tokens": false,
7
- "pooling_mode_weightedmean_tokens": false,
8
- "pooling_mode_lasttoken": false,
9
- "include_prompt": true
10
- }
 
 
 
 
 
 
 
 
 
 
 
checkpoint-1000/README.md DELETED
@@ -1,466 +0,0 @@
1
- ---
2
- language:
3
- - en
4
- license: apache-2.0
5
- tags:
6
- - sentence-transformers
7
- - sentence-similarity
8
- - feature-extraction
9
- - generated_from_trainer
10
- - dataset_size:2130621
11
- - loss:ContrastiveLoss
12
- base_model: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
13
- widget:
14
- - source_sentence: Kim Chol-sam
15
- sentences:
16
- - Stankevich Sergey Nikolayevich
17
- - Kim Chin-So’k
18
- - Julen Lopetegui Agote
19
- - source_sentence: دينا بنت عبد الحميد
20
- sentences:
21
- - Alexia van Amsberg
22
- - Anthony Nicholas Colin Maitland Biddulph, 5th Baron Biddulph
23
- - Dina bint Abdul-Hamíd
24
- - source_sentence: Մուհամեդ բեն Նաիֆ Ալ Սաուդ
25
- sentences:
26
- - Karpov Anatoly Evgenyevich
27
- - GNPower Mariveles Coal Plant [former]
28
- - Muhammed bin Nayef bin Abdul Aziz Al Saud
29
- - source_sentence: Edward Gnehm
30
- sentences:
31
- - Шауэрте, Хартмут
32
- - Ханзада Филипп, Эдинбург герцогі
33
- - AFX
34
- - source_sentence: Schori i Lidingö
35
- sentences:
36
- - Yordan Canev
37
- - ကားပေါ့ အန်နာတိုလီ
38
- - BYSTROV, Mikhail Ivanovich
39
- pipeline_tag: sentence-similarity
40
- library_name: sentence-transformers
41
- metrics:
42
- - cosine_accuracy
43
- - cosine_accuracy_threshold
44
- - cosine_f1
45
- - cosine_f1_threshold
46
- - cosine_precision
47
- - cosine_recall
48
- - cosine_ap
49
- - cosine_mcc
50
- model-index:
51
- - name: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2-name-matcher-original
52
- results:
53
- - task:
54
- type: binary-classification
55
- name: Binary Classification
56
- dataset:
57
- name: sentence transformers paraphrase multilingual MiniLM L12 v2
58
- type: sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2
59
- metrics:
60
- - type: cosine_accuracy
61
- value: 0.9817931272716349
62
- name: Cosine Accuracy
63
- - type: cosine_accuracy_threshold
64
- value: 0.7197962999343872
65
- name: Cosine Accuracy Threshold
66
- - type: cosine_f1
67
- value: 0.9722373310278887
68
- name: Cosine F1
69
- - type: cosine_f1_threshold
70
- value: 0.7091608047485352
71
- name: Cosine F1 Threshold
72
- - type: cosine_precision
73
- value: 0.9675121928984912
74
- name: Cosine Precision
75
- - type: cosine_recall
76
- value: 0.9770088489465266
77
- name: Cosine Recall
78
- - type: cosine_ap
79
- value: 0.9944127523785896
80
- name: Cosine Ap
81
- - type: cosine_mcc
82
- value: 0.9587183163648803
83
- name: Cosine Mcc
84
- ---
85
-
86
- # sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2-name-matcher-original
87
-
88
- This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2). It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
89
-
90
- ## Model Details
91
-
92
- ### Model Description
93
- - **Model Type:** Sentence Transformer
94
- - **Base model:** [sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2) <!-- at revision 86741b4e3f5cb7765a600d3a3d55a0f6a6cb443d -->
95
- - **Maximum Sequence Length:** 128 tokens
96
- - **Output Dimensionality:** 384 dimensions
97
- - **Similarity Function:** Cosine Similarity
98
- <!-- - **Training Dataset:** Unknown -->
99
- - **Language:** en
100
- - **License:** apache-2.0
101
-
102
- ### Model Sources
103
-
104
- - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
105
- - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
106
- - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
107
-
108
- ### Full Model Architecture
109
-
110
- ```
111
- SentenceTransformer(
112
- (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel
113
- (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
114
- )
115
- ```
116
-
117
- ## Usage
118
-
119
- ### Direct Usage (Sentence Transformers)
120
-
121
- First install the Sentence Transformers library:
122
-
123
- ```bash
124
- pip install -U sentence-transformers
125
- ```
126
-
127
- Then you can load this model and run inference.
128
- ```python
129
- from sentence_transformers import SentenceTransformer
130
-
131
- # Download from the 🤗 Hub
132
- model = SentenceTransformer("sentence_transformers_model_id")
133
- # Run inference
134
- sentences = [
135
- 'Schori i Lidingö',
136
- 'Yordan Canev',
137
- 'ကားပေါ့ အန်နာတိုလီ',
138
- ]
139
- embeddings = model.encode(sentences)
140
- print(embeddings.shape)
141
- # [3, 384]
142
-
143
- # Get the similarity scores for the embeddings
144
- similarities = model.similarity(embeddings, embeddings)
145
- print(similarities.shape)
146
- # [3, 3]
147
- ```
148
-
149
- <!--
150
- ### Direct Usage (Transformers)
151
-
152
- <details><summary>Click to see the direct usage in Transformers</summary>
153
-
154
- </details>
155
- -->
156
-
157
- <!--
158
- ### Downstream Usage (Sentence Transformers)
159
-
160
- You can finetune this model on your own dataset.
161
-
162
- <details><summary>Click to expand</summary>
163
-
164
- </details>
165
- -->
166
-
167
- <!--
168
- ### Out-of-Scope Use
169
-
170
- *List how the model may foreseeably be misused and address what users ought not to do with the model.*
171
- -->
172
-
173
- ## Evaluation
174
-
175
- ### Metrics
176
-
177
- #### Binary Classification
178
-
179
- * Dataset: `sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2`
180
- * Evaluated with [<code>BinaryClassificationEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.BinaryClassificationEvaluator)
181
-
182
- | Metric | Value |
183
- |:--------------------------|:-----------|
184
- | cosine_accuracy | 0.9818 |
185
- | cosine_accuracy_threshold | 0.7198 |
186
- | cosine_f1 | 0.9722 |
187
- | cosine_f1_threshold | 0.7092 |
188
- | cosine_precision | 0.9675 |
189
- | cosine_recall | 0.977 |
190
- | **cosine_ap** | **0.9944** |
191
- | cosine_mcc | 0.9587 |
192
-
193
- <!--
194
- ## Bias, Risks and Limitations
195
-
196
- *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
197
- -->
198
-
199
- <!--
200
- ### Recommendations
201
-
202
- *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
203
- -->
204
-
205
- ## Training Details
206
-
207
- ### Training Dataset
208
-
209
- #### Unnamed Dataset
210
-
211
- * Size: 2,130,621 training samples
212
- * Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>label</code>
213
- * Approximate statistics based on the first 1000 samples:
214
- | | sentence1 | sentence2 | label |
215
- |:--------|:---------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|:---------------------------------------------------------------|
216
- | type | string | string | float |
217
- | details | <ul><li>min: 3 tokens</li><li>mean: 9.32 tokens</li><li>max: 57 tokens</li></ul> | <ul><li>min: 3 tokens</li><li>mean: 9.16 tokens</li><li>max: 54 tokens</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.34</li><li>max: 1.0</li></ul> |
218
- * Samples:
219
- | sentence1 | sentence2 | label |
220
- |:----------------------------------|:------------------------------------|:-----------------|
221
- | <code>캐스린 설리번</code> | <code>Kathryn D. Sullivanová</code> | <code>1.0</code> |
222
- | <code>ଶିବରାଜ ଅଧାଲରାଓ ପାଟିଲ</code> | <code>Aleksander Lubocki</code> | <code>0.0</code> |
223
- | <code>Пырванов, Георги</code> | <code>アナトーリー・セルジュコフ</code> | <code>0.0</code> |
224
- * Loss: [<code>ContrastiveLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#contrastiveloss) with these parameters:
225
- ```json
226
- {
227
- "distance_metric": "SiameseDistanceMetric.COSINE_DISTANCE",
228
- "margin": 0.5,
229
- "size_average": true
230
- }
231
- ```
232
-
233
- ### Evaluation Dataset
234
-
235
- #### Unnamed Dataset
236
-
237
- * Size: 2,663,276 evaluation samples
238
- * Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>label</code>
239
- * Approximate statistics based on the first 1000 samples:
240
- | | sentence1 | sentence2 | label |
241
- |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:---------------------------------------------------------------|
242
- | type | string | string | float |
243
- | details | <ul><li>min: 3 tokens</li><li>mean: 9.34 tokens</li><li>max: 102 tokens</li></ul> | <ul><li>min: 4 tokens</li><li>mean: 9.11 tokens</li><li>max: 100 tokens</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.33</li><li>max: 1.0</li></ul> |
244
- * Samples:
245
- | sentence1 | sentence2 | label |
246
- |:--------------------------------------|:---------------------------------------|:-----------------|
247
- | <code>Ева Херман</code> | <code>I Xuan Karlos</code> | <code>0.0</code> |
248
- | <code>Кличков Андрій Євгенович</code> | <code>Андрэй Яўгенавіч Клычкоў</code> | <code>1.0</code> |
249
- | <code>Кинах А.</code> | <code>Senator John Hickenlooper</code> | <code>0.0</code> |
250
- * Loss: [<code>ContrastiveLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#contrastiveloss) with these parameters:
251
- ```json
252
- {
253
- "distance_metric": "SiameseDistanceMetric.COSINE_DISTANCE",
254
- "margin": 0.5,
255
- "size_average": true
256
- }
257
- ```
258
-
259
- ### Training Hyperparameters
260
- #### Non-Default Hyperparameters
261
-
262
- - `eval_strategy`: steps
263
- - `per_device_train_batch_size`: 1000
264
- - `per_device_eval_batch_size`: 1000
265
- - `gradient_accumulation_steps`: 4
266
- - `learning_rate`: 3e-05
267
- - `weight_decay`: 0.01
268
- - `num_train_epochs`: 8
269
- - `warmup_ratio`: 0.1
270
- - `fp16_opt_level`: O0
271
- - `load_best_model_at_end`: True
272
- - `optim`: adafactor
273
-
274
- #### All Hyperparameters
275
- <details><summary>Click to expand</summary>
276
-
277
- - `overwrite_output_dir`: False
278
- - `do_predict`: False
279
- - `eval_strategy`: steps
280
- - `prediction_loss_only`: True
281
- - `per_device_train_batch_size`: 1000
282
- - `per_device_eval_batch_size`: 1000
283
- - `per_gpu_train_batch_size`: None
284
- - `per_gpu_eval_batch_size`: None
285
- - `gradient_accumulation_steps`: 4
286
- - `eval_accumulation_steps`: None
287
- - `torch_empty_cache_steps`: None
288
- - `learning_rate`: 3e-05
289
- - `weight_decay`: 0.01
290
- - `adam_beta1`: 0.9
291
- - `adam_beta2`: 0.999
292
- - `adam_epsilon`: 1e-08
293
- - `max_grad_norm`: 1.0
294
- - `num_train_epochs`: 8
295
- - `max_steps`: -1
296
- - `lr_scheduler_type`: linear
297
- - `lr_scheduler_kwargs`: {}
298
- - `warmup_ratio`: 0.1
299
- - `warmup_steps`: 0
300
- - `log_level`: passive
301
- - `log_level_replica`: warning
302
- - `log_on_each_node`: True
303
- - `logging_nan_inf_filter`: True
304
- - `save_safetensors`: True
305
- - `save_on_each_node`: False
306
- - `save_only_model`: False
307
- - `restore_callback_states_from_checkpoint`: False
308
- - `no_cuda`: False
309
- - `use_cpu`: False
310
- - `use_mps_device`: False
311
- - `seed`: 42
312
- - `data_seed`: None
313
- - `jit_mode_eval`: False
314
- - `use_ipex`: False
315
- - `bf16`: False
316
- - `fp16`: False
317
- - `fp16_opt_level`: O0
318
- - `half_precision_backend`: auto
319
- - `bf16_full_eval`: False
320
- - `fp16_full_eval`: False
321
- - `tf32`: None
322
- - `local_rank`: 0
323
- - `ddp_backend`: None
324
- - `tpu_num_cores`: None
325
- - `tpu_metrics_debug`: False
326
- - `debug`: []
327
- - `dataloader_drop_last`: False
328
- - `dataloader_num_workers`: 0
329
- - `dataloader_prefetch_factor`: None
330
- - `past_index`: -1
331
- - `disable_tqdm`: False
332
- - `remove_unused_columns`: True
333
- - `label_names`: None
334
- - `load_best_model_at_end`: True
335
- - `ignore_data_skip`: False
336
- - `fsdp`: []
337
- - `fsdp_min_num_params`: 0
338
- - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
339
- - `tp_size`: 0
340
- - `fsdp_transformer_layer_cls_to_wrap`: None
341
- - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
342
- - `deepspeed`: None
343
- - `label_smoothing_factor`: 0.0
344
- - `optim`: adafactor
345
- - `optim_args`: None
346
- - `adafactor`: False
347
- - `group_by_length`: False
348
- - `length_column_name`: length
349
- - `ddp_find_unused_parameters`: None
350
- - `ddp_bucket_cap_mb`: None
351
- - `ddp_broadcast_buffers`: False
352
- - `dataloader_pin_memory`: True
353
- - `dataloader_persistent_workers`: False
354
- - `skip_memory_metrics`: True
355
- - `use_legacy_prediction_loop`: False
356
- - `push_to_hub`: False
357
- - `resume_from_checkpoint`: None
358
- - `hub_model_id`: None
359
- - `hub_strategy`: every_save
360
- - `hub_private_repo`: None
361
- - `hub_always_push`: False
362
- - `gradient_checkpointing`: False
363
- - `gradient_checkpointing_kwargs`: None
364
- - `include_inputs_for_metrics`: False
365
- - `include_for_metrics`: []
366
- - `eval_do_concat_batches`: True
367
- - `fp16_backend`: auto
368
- - `push_to_hub_model_id`: None
369
- - `push_to_hub_organization`: None
370
- - `mp_parameters`:
371
- - `auto_find_batch_size`: False
372
- - `full_determinism`: False
373
- - `torchdynamo`: None
374
- - `ray_scope`: last
375
- - `ddp_timeout`: 1800
376
- - `torch_compile`: False
377
- - `torch_compile_backend`: None
378
- - `torch_compile_mode`: None
379
- - `include_tokens_per_second`: False
380
- - `include_num_input_tokens_seen`: False
381
- - `neftune_noise_alpha`: None
382
- - `optim_target_modules`: None
383
- - `batch_eval_metrics`: False
384
- - `eval_on_start`: False
385
- - `use_liger_kernel`: False
386
- - `eval_use_gather_object`: False
387
- - `average_tokens_across_devices`: False
388
- - `prompts`: None
389
- - `batch_sampler`: batch_sampler
390
- - `multi_dataset_batch_sampler`: proportional
391
-
392
- </details>
393
-
394
- ### Training Logs
395
- | Epoch | Step | Training Loss | Validation Loss | sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap |
396
- |:------:|:----:|:-------------:|:---------------:|:---------------------------------------------------------------------:|
397
- | -1 | -1 | - | - | 0.7140 |
398
- | 0.1877 | 100 | - | 0.0125 | 0.8849 |
399
- | 0.3754 | 200 | - | 0.0090 | 0.9369 |
400
- | 0.5631 | 300 | - | 0.0068 | 0.9630 |
401
- | 0.7508 | 400 | - | 0.0052 | 0.9774 |
402
- | 0.9385 | 500 | 0.0409 | 0.0040 | 0.9845 |
403
- | 1.1276 | 600 | - | 0.0033 | 0.9887 |
404
- | 1.3153 | 700 | - | 0.0028 | 0.9911 |
405
- | 1.5031 | 800 | - | 0.0026 | 0.9927 |
406
- | 1.6908 | 900 | - | 0.0022 | 0.9938 |
407
- | 1.8785 | 1000 | 0.0131 | 0.0022 | 0.9944 |
408
-
409
-
410
- ### Framework Versions
411
- - Python: 3.12.9
412
- - Sentence Transformers: 3.4.1
413
- - Transformers: 4.51.3
414
- - PyTorch: 2.7.0+cu126
415
- - Accelerate: 1.6.0
416
- - Datasets: 3.6.0
417
- - Tokenizers: 0.21.1
418
-
419
- ## Citation
420
-
421
- ### BibTeX
422
-
423
- #### Sentence Transformers
424
- ```bibtex
425
- @inproceedings{reimers-2019-sentence-bert,
426
- title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
427
- author = "Reimers, Nils and Gurevych, Iryna",
428
- booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
429
- month = "11",
430
- year = "2019",
431
- publisher = "Association for Computational Linguistics",
432
- url = "https://arxiv.org/abs/1908.10084",
433
- }
434
- ```
435
-
436
- #### ContrastiveLoss
437
- ```bibtex
438
- @inproceedings{hadsell2006dimensionality,
439
- author={Hadsell, R. and Chopra, S. and LeCun, Y.},
440
- booktitle={2006 IEEE Computer Society Conference on Computer Vision and Pattern Recognition (CVPR'06)},
441
- title={Dimensionality Reduction by Learning an Invariant Mapping},
442
- year={2006},
443
- volume={2},
444
- number={},
445
- pages={1735-1742},
446
- doi={10.1109/CVPR.2006.100}
447
- }
448
- ```
449
-
450
- <!--
451
- ## Glossary
452
-
453
- *Clearly define terms in order to be accessible across audiences.*
454
- -->
455
-
456
- <!--
457
- ## Model Card Authors
458
-
459
- *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
460
- -->
461
-
462
- <!--
463
- ## Model Card Contact
464
-
465
- *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
466
- -->
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-1000/config.json DELETED
@@ -1,25 +0,0 @@
1
- {
2
- "architectures": [
3
- "BertModel"
4
- ],
5
- "attention_probs_dropout_prob": 0.1,
6
- "classifier_dropout": null,
7
- "gradient_checkpointing": false,
8
- "hidden_act": "gelu",
9
- "hidden_dropout_prob": 0.1,
10
- "hidden_size": 384,
11
- "initializer_range": 0.02,
12
- "intermediate_size": 1536,
13
- "layer_norm_eps": 1e-12,
14
- "max_position_embeddings": 512,
15
- "model_type": "bert",
16
- "num_attention_heads": 12,
17
- "num_hidden_layers": 12,
18
- "pad_token_id": 0,
19
- "position_embedding_type": "absolute",
20
- "torch_dtype": "float32",
21
- "transformers_version": "4.51.3",
22
- "type_vocab_size": 2,
23
- "use_cache": true,
24
- "vocab_size": 250037
25
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-1000/config_sentence_transformers.json DELETED
@@ -1,10 +0,0 @@
1
- {
2
- "__version__": {
3
- "sentence_transformers": "3.4.1",
4
- "transformers": "4.51.3",
5
- "pytorch": "2.7.0+cu126"
6
- },
7
- "prompts": {},
8
- "default_prompt_name": null,
9
- "similarity_fn_name": "cosine"
10
- }
 
 
 
 
 
 
 
 
 
 
 
checkpoint-1000/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:53cdf706594f9c2e35f539f5023cae863f9d5c0e8588348281d86e7ac79b4662
3
- size 470637416
 
 
 
 
checkpoint-1000/modules.json DELETED
@@ -1,14 +0,0 @@
1
- [
2
- {
3
- "idx": 0,
4
- "name": "0",
5
- "path": "",
6
- "type": "sentence_transformers.models.Transformer"
7
- },
8
- {
9
- "idx": 1,
10
- "name": "1",
11
- "path": "1_Pooling",
12
- "type": "sentence_transformers.models.Pooling"
13
- }
14
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-1000/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c108fa814d36d19a8e9c702a9800909a0bdbcc7bbc32071418d14ec158efbaf5
3
- size 1715019
 
 
 
 
checkpoint-1000/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:31530f34a96cd557f736a4c9e2dbdab66da89f3ee40e3c858c87c688d4a1b9a1
3
- size 14645
 
 
 
 
checkpoint-1000/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8985e0cd69062d78f38bea6c82894c697cf2eff7e9a24bf93fa0da194c1b5e7
3
- size 1465
 
 
 
 
checkpoint-1000/sentence_bert_config.json DELETED
@@ -1,4 +0,0 @@
1
- {
2
- "max_seq_length": 128,
3
- "do_lower_case": false
4
- }
 
 
 
 
 
checkpoint-1000/special_tokens_map.json DELETED
@@ -1,51 +0,0 @@
1
- {
2
- "bos_token": {
3
- "content": "<s>",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "cls_token": {
10
- "content": "<s>",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "eos_token": {
17
- "content": "</s>",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- },
23
- "mask_token": {
24
- "content": "<mask>",
25
- "lstrip": true,
26
- "normalized": false,
27
- "rstrip": false,
28
- "single_word": false
29
- },
30
- "pad_token": {
31
- "content": "<pad>",
32
- "lstrip": false,
33
- "normalized": false,
34
- "rstrip": false,
35
- "single_word": false
36
- },
37
- "sep_token": {
38
- "content": "</s>",
39
- "lstrip": false,
40
- "normalized": false,
41
- "rstrip": false,
42
- "single_word": false
43
- },
44
- "unk_token": {
45
- "content": "<unk>",
46
- "lstrip": false,
47
- "normalized": false,
48
- "rstrip": false,
49
- "single_word": false
50
- }
51
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-1000/tokenizer.json DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:cad551d5600a84242d0973327029452a1e3672ba6313c2a3c3d69c4310e12719
3
- size 17082987
 
 
 
 
checkpoint-1000/tokenizer_config.json DELETED
@@ -1,65 +0,0 @@
1
- {
2
- "added_tokens_decoder": {
3
- "0": {
4
- "content": "<s>",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false,
9
- "special": true
10
- },
11
- "1": {
12
- "content": "<pad>",
13
- "lstrip": false,
14
- "normalized": false,
15
- "rstrip": false,
16
- "single_word": false,
17
- "special": true
18
- },
19
- "2": {
20
- "content": "</s>",
21
- "lstrip": false,
22
- "normalized": false,
23
- "rstrip": false,
24
- "single_word": false,
25
- "special": true
26
- },
27
- "3": {
28
- "content": "<unk>",
29
- "lstrip": false,
30
- "normalized": false,
31
- "rstrip": false,
32
- "single_word": false,
33
- "special": true
34
- },
35
- "250001": {
36
- "content": "<mask>",
37
- "lstrip": true,
38
- "normalized": false,
39
- "rstrip": false,
40
- "single_word": false,
41
- "special": true
42
- }
43
- },
44
- "bos_token": "<s>",
45
- "clean_up_tokenization_spaces": false,
46
- "cls_token": "<s>",
47
- "do_lower_case": true,
48
- "eos_token": "</s>",
49
- "extra_special_tokens": {},
50
- "mask_token": "<mask>",
51
- "max_length": 128,
52
- "model_max_length": 128,
53
- "pad_to_multiple_of": null,
54
- "pad_token": "<pad>",
55
- "pad_token_type_id": 0,
56
- "padding_side": "right",
57
- "sep_token": "</s>",
58
- "stride": 0,
59
- "strip_accents": null,
60
- "tokenize_chinese_chars": true,
61
- "tokenizer_class": "BertTokenizer",
62
- "truncation_side": "right",
63
- "truncation_strategy": "longest_first",
64
- "unk_token": "<unk>"
65
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-1000/trainer_state.json DELETED
@@ -1,217 +0,0 @@
1
- {
2
- "best_global_step": 1000,
3
- "best_metric": 0.002240537665784359,
4
- "best_model_checkpoint": "data/fine-tuned-sbert-sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2-original-adafactor/checkpoint-1000",
5
- "epoch": 1.8784608165180665,
6
- "eval_steps": 100,
7
- "global_step": 1000,
8
- "is_hyper_param_search": false,
9
- "is_local_process_zero": true,
10
- "is_world_process_zero": true,
11
- "log_history": [
12
- {
13
- "epoch": 0.18770530267480057,
14
- "eval_loss": 0.012530049309134483,
15
- "eval_runtime": 812.6802,
16
- "eval_samples_per_second": 3277.151,
17
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.8778235859541618,
18
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7128396034240723,
19
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.8848748516159781,
20
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.812583495899967,
21
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.6880456209182739,
22
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.7185793630359445,
23
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.7900823930955021,
24
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.8364038065429271,
25
- "eval_steps_per_second": 3.278,
26
- "step": 100
27
- },
28
- {
29
- "epoch": 0.37541060534960113,
30
- "eval_loss": 0.009013425558805466,
31
- "eval_runtime": 792.9843,
32
- "eval_samples_per_second": 3358.548,
33
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9164113424048541,
34
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7378441095352173,
35
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9368603114664952,
36
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.8729798695775446,
37
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7272344827651978,
38
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.8103205315460159,
39
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.8605654745268148,
40
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.8857576838544123,
41
- "eval_steps_per_second": 3.359,
42
- "step": 200
43
- },
44
- {
45
- "epoch": 0.5631159080244017,
46
- "eval_loss": 0.006819029338657856,
47
- "eval_runtime": 809.9704,
48
- "eval_samples_per_second": 3288.115,
49
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9398298338890391,
50
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7449667453765869,
51
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9629957356284182,
52
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9088032597499417,
53
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7449667453765869,
54
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.864029341509194,
55
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.8990159430733201,
56
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9188060251084542,
57
- "eval_steps_per_second": 3.289,
58
- "step": 300
59
- },
60
- {
61
- "epoch": 0.7508212106992023,
62
- "eval_loss": 0.005150709766894579,
63
- "eval_runtime": 797.9199,
64
- "eval_samples_per_second": 3337.773,
65
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9560016220600163,
66
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7553268671035767,
67
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9774059659768239,
68
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9333702119012406,
69
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7449506521224976,
70
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.9005457325671423,
71
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.916037892637527,
72
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9513710688929036,
73
- "eval_steps_per_second": 3.339,
74
- "step": 400
75
- },
76
- {
77
- "epoch": 0.9385265133740028,
78
- "grad_norm": 0.17396493256092072,
79
- "learning_rate": 2.9428198433420364e-05,
80
- "loss": 0.0409,
81
- "step": 500
82
- },
83
- {
84
- "epoch": 0.9385265133740028,
85
- "eval_loss": 0.003973629325628281,
86
- "eval_runtime": 809.4532,
87
- "eval_samples_per_second": 3290.216,
88
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9655950557207654,
89
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7622435092926025,
90
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9845099503823473,
91
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9477742208778024,
92
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7535413503646851,
93
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.9221773981286795,
94
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.9367750202319935,
95
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9590347859107281,
96
- "eval_steps_per_second": 3.291,
97
- "step": 500
98
- },
99
- {
100
- "epoch": 1.1276396058188645,
101
- "eval_loss": 0.0032712339889258146,
102
- "eval_runtime": 793.7573,
103
- "eval_samples_per_second": 3355.277,
104
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9712722657775374,
105
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7610360383987427,
106
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9887055977101925,
107
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9564087809158087,
108
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7610177993774414,
109
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.9350876149915242,
110
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.9471753898932449,
111
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9658239646502422,
112
- "eval_steps_per_second": 3.356,
113
- "step": 600
114
- },
115
- {
116
- "epoch": 1.3153449084936648,
117
- "eval_loss": 0.0028166945558041334,
118
- "eval_runtime": 815.1943,
119
- "eval_samples_per_second": 3267.044,
120
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9751246583160614,
121
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7577522993087769,
122
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9911117019106511,
123
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9621558129059113,
124
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7424367666244507,
125
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.943665667488554,
126
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.9536134909690983,
127
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9708525597505264,
128
- "eval_steps_per_second": 3.268,
129
- "step": 700
130
- },
131
- {
132
- "epoch": 1.5030502111684654,
133
- "eval_loss": 0.0026242006570100784,
134
- "eval_runtime": 805.7115,
135
- "eval_samples_per_second": 3305.496,
136
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9782673995974888,
137
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7254683971405029,
138
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9927214598054878,
139
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9669240257663667,
140
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7145971059799194,
141
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.9507846488068235,
142
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.9597660102710608,
143
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9741896137072368,
144
- "eval_steps_per_second": 3.306,
145
- "step": 800
146
- },
147
- {
148
- "epoch": 1.690755513843266,
149
- "eval_loss": 0.002248650649562478,
150
- "eval_runtime": 818.5338,
151
- "eval_samples_per_second": 3253.715,
152
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9801973506353069,
153
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7349117994308472,
154
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9938133122786723,
155
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9698356230196407,
156
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7348856329917908,
157
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.9551340483533577,
158
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.9641228578901284,
159
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9756164919507957,
160
- "eval_steps_per_second": 3.255,
161
- "step": 900
162
- },
163
- {
164
- "epoch": 1.8784608165180665,
165
- "grad_norm": 0.07541557401418686,
166
- "learning_rate": 2.5511749347258486e-05,
167
- "loss": 0.0131,
168
- "step": 1000
169
- },
170
- {
171
- "epoch": 1.8784608165180665,
172
- "eval_loss": 0.002240537665784359,
173
- "eval_runtime": 803.6286,
174
- "eval_samples_per_second": 3314.063,
175
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9817931272716349,
176
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7197962999343872,
177
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9944127523785896,
178
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9722373310278887,
179
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7091608047485352,
180
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.9587183163648803,
181
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.9675121928984912,
182
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9770088489465266,
183
- "eval_steps_per_second": 3.315,
184
- "step": 1000
185
- }
186
- ],
187
- "logging_steps": 500,
188
- "max_steps": 4256,
189
- "num_input_tokens_seen": 0,
190
- "num_train_epochs": 8,
191
- "save_steps": 100,
192
- "stateful_callbacks": {
193
- "EarlyStoppingCallback": {
194
- "args": {
195
- "early_stopping_patience": 1,
196
- "early_stopping_threshold": 0.0
197
- },
198
- "attributes": {
199
- "early_stopping_patience_counter": 0
200
- }
201
- },
202
- "TrainerControl": {
203
- "args": {
204
- "should_epoch_stop": false,
205
- "should_evaluate": false,
206
- "should_log": false,
207
- "should_save": true,
208
- "should_training_stop": false
209
- },
210
- "attributes": {}
211
- }
212
- },
213
- "total_flos": 0.0,
214
- "train_batch_size": 1000,
215
- "trial_name": null,
216
- "trial_params": null
217
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-1000/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9339753774865faea550d7da93688221ca0f43171c16e3034645a2149992c8a6
3
- size 6033
 
 
 
 
checkpoint-1000/unigram.json DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:da145b5e7700ae40f16691ec32a0b1fdc1ee3298db22a31ea55f57a966c4a65d
3
- size 14763260
 
 
 
 
checkpoint-1100/1_Pooling/config.json DELETED
@@ -1,10 +0,0 @@
1
- {
2
- "word_embedding_dimension": 384,
3
- "pooling_mode_cls_token": false,
4
- "pooling_mode_mean_tokens": true,
5
- "pooling_mode_max_tokens": false,
6
- "pooling_mode_mean_sqrt_len_tokens": false,
7
- "pooling_mode_weightedmean_tokens": false,
8
- "pooling_mode_lasttoken": false,
9
- "include_prompt": true
10
- }
 
 
 
 
 
 
 
 
 
 
 
checkpoint-1100/README.md DELETED
@@ -1,467 +0,0 @@
1
- ---
2
- language:
3
- - en
4
- license: apache-2.0
5
- tags:
6
- - sentence-transformers
7
- - sentence-similarity
8
- - feature-extraction
9
- - generated_from_trainer
10
- - dataset_size:2130621
11
- - loss:ContrastiveLoss
12
- base_model: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
13
- widget:
14
- - source_sentence: Kim Chol-sam
15
- sentences:
16
- - Stankevich Sergey Nikolayevich
17
- - Kim Chin-So’k
18
- - Julen Lopetegui Agote
19
- - source_sentence: دينا بنت عبد الحميد
20
- sentences:
21
- - Alexia van Amsberg
22
- - Anthony Nicholas Colin Maitland Biddulph, 5th Baron Biddulph
23
- - Dina bint Abdul-Hamíd
24
- - source_sentence: Մուհամեդ բեն Նաիֆ Ալ Սաուդ
25
- sentences:
26
- - Karpov Anatoly Evgenyevich
27
- - GNPower Mariveles Coal Plant [former]
28
- - Muhammed bin Nayef bin Abdul Aziz Al Saud
29
- - source_sentence: Edward Gnehm
30
- sentences:
31
- - Шауэрте, Хартмут
32
- - Ханзада Филипп, Эдинбург герцогі
33
- - AFX
34
- - source_sentence: Schori i Lidingö
35
- sentences:
36
- - Yordan Canev
37
- - ကားပေါ့ အန်နာတိုလီ
38
- - BYSTROV, Mikhail Ivanovich
39
- pipeline_tag: sentence-similarity
40
- library_name: sentence-transformers
41
- metrics:
42
- - cosine_accuracy
43
- - cosine_accuracy_threshold
44
- - cosine_f1
45
- - cosine_f1_threshold
46
- - cosine_precision
47
- - cosine_recall
48
- - cosine_ap
49
- - cosine_mcc
50
- model-index:
51
- - name: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2-name-matcher-original
52
- results:
53
- - task:
54
- type: binary-classification
55
- name: Binary Classification
56
- dataset:
57
- name: sentence transformers paraphrase multilingual MiniLM L12 v2
58
- type: sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2
59
- metrics:
60
- - type: cosine_accuracy
61
- value: 0.9828594815415578
62
- name: Cosine Accuracy
63
- - type: cosine_accuracy_threshold
64
- value: 0.7552986741065979
65
- name: Cosine Accuracy Threshold
66
- - type: cosine_f1
67
- value: 0.973889221813201
68
- name: Cosine F1
69
- - type: cosine_f1_threshold
70
- value: 0.7401974201202393
71
- name: Cosine F1 Threshold
72
- - type: cosine_precision
73
- value: 0.9661201195760486
74
- name: Cosine Precision
75
- - type: cosine_recall
76
- value: 0.9817842882294052
77
- name: Cosine Recall
78
- - type: cosine_ap
79
- value: 0.9950493119597241
80
- name: Cosine Ap
81
- - type: cosine_mcc
82
- value: 0.9611601510291333
83
- name: Cosine Mcc
84
- ---
85
-
86
- # sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2-name-matcher-original
87
-
88
- This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2). It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
89
-
90
- ## Model Details
91
-
92
- ### Model Description
93
- - **Model Type:** Sentence Transformer
94
- - **Base model:** [sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2) <!-- at revision 86741b4e3f5cb7765a600d3a3d55a0f6a6cb443d -->
95
- - **Maximum Sequence Length:** 128 tokens
96
- - **Output Dimensionality:** 384 dimensions
97
- - **Similarity Function:** Cosine Similarity
98
- <!-- - **Training Dataset:** Unknown -->
99
- - **Language:** en
100
- - **License:** apache-2.0
101
-
102
- ### Model Sources
103
-
104
- - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
105
- - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
106
- - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
107
-
108
- ### Full Model Architecture
109
-
110
- ```
111
- SentenceTransformer(
112
- (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel
113
- (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
114
- )
115
- ```
116
-
117
- ## Usage
118
-
119
- ### Direct Usage (Sentence Transformers)
120
-
121
- First install the Sentence Transformers library:
122
-
123
- ```bash
124
- pip install -U sentence-transformers
125
- ```
126
-
127
- Then you can load this model and run inference.
128
- ```python
129
- from sentence_transformers import SentenceTransformer
130
-
131
- # Download from the 🤗 Hub
132
- model = SentenceTransformer("sentence_transformers_model_id")
133
- # Run inference
134
- sentences = [
135
- 'Schori i Lidingö',
136
- 'Yordan Canev',
137
- 'ကားပေါ့ အန်နာတိုလီ',
138
- ]
139
- embeddings = model.encode(sentences)
140
- print(embeddings.shape)
141
- # [3, 384]
142
-
143
- # Get the similarity scores for the embeddings
144
- similarities = model.similarity(embeddings, embeddings)
145
- print(similarities.shape)
146
- # [3, 3]
147
- ```
148
-
149
- <!--
150
- ### Direct Usage (Transformers)
151
-
152
- <details><summary>Click to see the direct usage in Transformers</summary>
153
-
154
- </details>
155
- -->
156
-
157
- <!--
158
- ### Downstream Usage (Sentence Transformers)
159
-
160
- You can finetune this model on your own dataset.
161
-
162
- <details><summary>Click to expand</summary>
163
-
164
- </details>
165
- -->
166
-
167
- <!--
168
- ### Out-of-Scope Use
169
-
170
- *List how the model may foreseeably be misused and address what users ought not to do with the model.*
171
- -->
172
-
173
- ## Evaluation
174
-
175
- ### Metrics
176
-
177
- #### Binary Classification
178
-
179
- * Dataset: `sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2`
180
- * Evaluated with [<code>BinaryClassificationEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.BinaryClassificationEvaluator)
181
-
182
- | Metric | Value |
183
- |:--------------------------|:----------|
184
- | cosine_accuracy | 0.9829 |
185
- | cosine_accuracy_threshold | 0.7553 |
186
- | cosine_f1 | 0.9739 |
187
- | cosine_f1_threshold | 0.7402 |
188
- | cosine_precision | 0.9661 |
189
- | cosine_recall | 0.9818 |
190
- | **cosine_ap** | **0.995** |
191
- | cosine_mcc | 0.9612 |
192
-
193
- <!--
194
- ## Bias, Risks and Limitations
195
-
196
- *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
197
- -->
198
-
199
- <!--
200
- ### Recommendations
201
-
202
- *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
203
- -->
204
-
205
- ## Training Details
206
-
207
- ### Training Dataset
208
-
209
- #### Unnamed Dataset
210
-
211
- * Size: 2,130,621 training samples
212
- * Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>label</code>
213
- * Approximate statistics based on the first 1000 samples:
214
- | | sentence1 | sentence2 | label |
215
- |:--------|:---------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|:---------------------------------------------------------------|
216
- | type | string | string | float |
217
- | details | <ul><li>min: 3 tokens</li><li>mean: 9.32 tokens</li><li>max: 57 tokens</li></ul> | <ul><li>min: 3 tokens</li><li>mean: 9.16 tokens</li><li>max: 54 tokens</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.34</li><li>max: 1.0</li></ul> |
218
- * Samples:
219
- | sentence1 | sentence2 | label |
220
- |:----------------------------------|:------------------------------------|:-----------------|
221
- | <code>캐스린 설리번</code> | <code>Kathryn D. Sullivanová</code> | <code>1.0</code> |
222
- | <code>ଶିବରାଜ ଅଧାଲରାଓ ପାଟିଲ</code> | <code>Aleksander Lubocki</code> | <code>0.0</code> |
223
- | <code>Пырванов, Георги</code> | <code>アナトーリー・セルジュコフ</code> | <code>0.0</code> |
224
- * Loss: [<code>ContrastiveLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#contrastiveloss) with these parameters:
225
- ```json
226
- {
227
- "distance_metric": "SiameseDistanceMetric.COSINE_DISTANCE",
228
- "margin": 0.5,
229
- "size_average": true
230
- }
231
- ```
232
-
233
- ### Evaluation Dataset
234
-
235
- #### Unnamed Dataset
236
-
237
- * Size: 2,663,276 evaluation samples
238
- * Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>label</code>
239
- * Approximate statistics based on the first 1000 samples:
240
- | | sentence1 | sentence2 | label |
241
- |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:---------------------------------------------------------------|
242
- | type | string | string | float |
243
- | details | <ul><li>min: 3 tokens</li><li>mean: 9.34 tokens</li><li>max: 102 tokens</li></ul> | <ul><li>min: 4 tokens</li><li>mean: 9.11 tokens</li><li>max: 100 tokens</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.33</li><li>max: 1.0</li></ul> |
244
- * Samples:
245
- | sentence1 | sentence2 | label |
246
- |:--------------------------------------|:---------------------------------------|:-----------------|
247
- | <code>Ева Херман</code> | <code>I Xuan Karlos</code> | <code>0.0</code> |
248
- | <code>Кличков Андрій Євгенович</code> | <code>Андрэй Яўгенавіч Клычкоў</code> | <code>1.0</code> |
249
- | <code>Кинах А.</code> | <code>Senator John Hickenlooper</code> | <code>0.0</code> |
250
- * Loss: [<code>ContrastiveLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#contrastiveloss) with these parameters:
251
- ```json
252
- {
253
- "distance_metric": "SiameseDistanceMetric.COSINE_DISTANCE",
254
- "margin": 0.5,
255
- "size_average": true
256
- }
257
- ```
258
-
259
- ### Training Hyperparameters
260
- #### Non-Default Hyperparameters
261
-
262
- - `eval_strategy`: steps
263
- - `per_device_train_batch_size`: 1000
264
- - `per_device_eval_batch_size`: 1000
265
- - `gradient_accumulation_steps`: 4
266
- - `learning_rate`: 3e-05
267
- - `weight_decay`: 0.01
268
- - `num_train_epochs`: 8
269
- - `warmup_ratio`: 0.1
270
- - `fp16_opt_level`: O0
271
- - `load_best_model_at_end`: True
272
- - `optim`: adafactor
273
-
274
- #### All Hyperparameters
275
- <details><summary>Click to expand</summary>
276
-
277
- - `overwrite_output_dir`: False
278
- - `do_predict`: False
279
- - `eval_strategy`: steps
280
- - `prediction_loss_only`: True
281
- - `per_device_train_batch_size`: 1000
282
- - `per_device_eval_batch_size`: 1000
283
- - `per_gpu_train_batch_size`: None
284
- - `per_gpu_eval_batch_size`: None
285
- - `gradient_accumulation_steps`: 4
286
- - `eval_accumulation_steps`: None
287
- - `torch_empty_cache_steps`: None
288
- - `learning_rate`: 3e-05
289
- - `weight_decay`: 0.01
290
- - `adam_beta1`: 0.9
291
- - `adam_beta2`: 0.999
292
- - `adam_epsilon`: 1e-08
293
- - `max_grad_norm`: 1.0
294
- - `num_train_epochs`: 8
295
- - `max_steps`: -1
296
- - `lr_scheduler_type`: linear
297
- - `lr_scheduler_kwargs`: {}
298
- - `warmup_ratio`: 0.1
299
- - `warmup_steps`: 0
300
- - `log_level`: passive
301
- - `log_level_replica`: warning
302
- - `log_on_each_node`: True
303
- - `logging_nan_inf_filter`: True
304
- - `save_safetensors`: True
305
- - `save_on_each_node`: False
306
- - `save_only_model`: False
307
- - `restore_callback_states_from_checkpoint`: False
308
- - `no_cuda`: False
309
- - `use_cpu`: False
310
- - `use_mps_device`: False
311
- - `seed`: 42
312
- - `data_seed`: None
313
- - `jit_mode_eval`: False
314
- - `use_ipex`: False
315
- - `bf16`: False
316
- - `fp16`: False
317
- - `fp16_opt_level`: O0
318
- - `half_precision_backend`: auto
319
- - `bf16_full_eval`: False
320
- - `fp16_full_eval`: False
321
- - `tf32`: None
322
- - `local_rank`: 0
323
- - `ddp_backend`: None
324
- - `tpu_num_cores`: None
325
- - `tpu_metrics_debug`: False
326
- - `debug`: []
327
- - `dataloader_drop_last`: False
328
- - `dataloader_num_workers`: 0
329
- - `dataloader_prefetch_factor`: None
330
- - `past_index`: -1
331
- - `disable_tqdm`: False
332
- - `remove_unused_columns`: True
333
- - `label_names`: None
334
- - `load_best_model_at_end`: True
335
- - `ignore_data_skip`: False
336
- - `fsdp`: []
337
- - `fsdp_min_num_params`: 0
338
- - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
339
- - `tp_size`: 0
340
- - `fsdp_transformer_layer_cls_to_wrap`: None
341
- - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
342
- - `deepspeed`: None
343
- - `label_smoothing_factor`: 0.0
344
- - `optim`: adafactor
345
- - `optim_args`: None
346
- - `adafactor`: False
347
- - `group_by_length`: False
348
- - `length_column_name`: length
349
- - `ddp_find_unused_parameters`: None
350
- - `ddp_bucket_cap_mb`: None
351
- - `ddp_broadcast_buffers`: False
352
- - `dataloader_pin_memory`: True
353
- - `dataloader_persistent_workers`: False
354
- - `skip_memory_metrics`: True
355
- - `use_legacy_prediction_loop`: False
356
- - `push_to_hub`: False
357
- - `resume_from_checkpoint`: None
358
- - `hub_model_id`: None
359
- - `hub_strategy`: every_save
360
- - `hub_private_repo`: None
361
- - `hub_always_push`: False
362
- - `gradient_checkpointing`: False
363
- - `gradient_checkpointing_kwargs`: None
364
- - `include_inputs_for_metrics`: False
365
- - `include_for_metrics`: []
366
- - `eval_do_concat_batches`: True
367
- - `fp16_backend`: auto
368
- - `push_to_hub_model_id`: None
369
- - `push_to_hub_organization`: None
370
- - `mp_parameters`:
371
- - `auto_find_batch_size`: False
372
- - `full_determinism`: False
373
- - `torchdynamo`: None
374
- - `ray_scope`: last
375
- - `ddp_timeout`: 1800
376
- - `torch_compile`: False
377
- - `torch_compile_backend`: None
378
- - `torch_compile_mode`: None
379
- - `include_tokens_per_second`: False
380
- - `include_num_input_tokens_seen`: False
381
- - `neftune_noise_alpha`: None
382
- - `optim_target_modules`: None
383
- - `batch_eval_metrics`: False
384
- - `eval_on_start`: False
385
- - `use_liger_kernel`: False
386
- - `eval_use_gather_object`: False
387
- - `average_tokens_across_devices`: False
388
- - `prompts`: None
389
- - `batch_sampler`: batch_sampler
390
- - `multi_dataset_batch_sampler`: proportional
391
-
392
- </details>
393
-
394
- ### Training Logs
395
- | Epoch | Step | Training Loss | Validation Loss | sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap |
396
- |:------:|:----:|:-------------:|:---------------:|:---------------------------------------------------------------------:|
397
- | -1 | -1 | - | - | 0.7140 |
398
- | 0.1877 | 100 | - | 0.0125 | 0.8849 |
399
- | 0.3754 | 200 | - | 0.0090 | 0.9369 |
400
- | 0.5631 | 300 | - | 0.0068 | 0.9630 |
401
- | 0.7508 | 400 | - | 0.0052 | 0.9774 |
402
- | 0.9385 | 500 | 0.0409 | 0.0040 | 0.9845 |
403
- | 1.1276 | 600 | - | 0.0033 | 0.9887 |
404
- | 1.3153 | 700 | - | 0.0028 | 0.9911 |
405
- | 1.5031 | 800 | - | 0.0026 | 0.9927 |
406
- | 1.6908 | 900 | - | 0.0022 | 0.9938 |
407
- | 1.8785 | 1000 | 0.0131 | 0.0022 | 0.9944 |
408
- | 2.0676 | 1100 | - | 0.0019 | 0.9950 |
409
-
410
-
411
- ### Framework Versions
412
- - Python: 3.12.9
413
- - Sentence Transformers: 3.4.1
414
- - Transformers: 4.51.3
415
- - PyTorch: 2.7.0+cu126
416
- - Accelerate: 1.6.0
417
- - Datasets: 3.6.0
418
- - Tokenizers: 0.21.1
419
-
420
- ## Citation
421
-
422
- ### BibTeX
423
-
424
- #### Sentence Transformers
425
- ```bibtex
426
- @inproceedings{reimers-2019-sentence-bert,
427
- title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
428
- author = "Reimers, Nils and Gurevych, Iryna",
429
- booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
430
- month = "11",
431
- year = "2019",
432
- publisher = "Association for Computational Linguistics",
433
- url = "https://arxiv.org/abs/1908.10084",
434
- }
435
- ```
436
-
437
- #### ContrastiveLoss
438
- ```bibtex
439
- @inproceedings{hadsell2006dimensionality,
440
- author={Hadsell, R. and Chopra, S. and LeCun, Y.},
441
- booktitle={2006 IEEE Computer Society Conference on Computer Vision and Pattern Recognition (CVPR'06)},
442
- title={Dimensionality Reduction by Learning an Invariant Mapping},
443
- year={2006},
444
- volume={2},
445
- number={},
446
- pages={1735-1742},
447
- doi={10.1109/CVPR.2006.100}
448
- }
449
- ```
450
-
451
- <!--
452
- ## Glossary
453
-
454
- *Clearly define terms in order to be accessible across audiences.*
455
- -->
456
-
457
- <!--
458
- ## Model Card Authors
459
-
460
- *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
461
- -->
462
-
463
- <!--
464
- ## Model Card Contact
465
-
466
- *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
467
- -->
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-1100/config.json DELETED
@@ -1,25 +0,0 @@
1
- {
2
- "architectures": [
3
- "BertModel"
4
- ],
5
- "attention_probs_dropout_prob": 0.1,
6
- "classifier_dropout": null,
7
- "gradient_checkpointing": false,
8
- "hidden_act": "gelu",
9
- "hidden_dropout_prob": 0.1,
10
- "hidden_size": 384,
11
- "initializer_range": 0.02,
12
- "intermediate_size": 1536,
13
- "layer_norm_eps": 1e-12,
14
- "max_position_embeddings": 512,
15
- "model_type": "bert",
16
- "num_attention_heads": 12,
17
- "num_hidden_layers": 12,
18
- "pad_token_id": 0,
19
- "position_embedding_type": "absolute",
20
- "torch_dtype": "float32",
21
- "transformers_version": "4.51.3",
22
- "type_vocab_size": 2,
23
- "use_cache": true,
24
- "vocab_size": 250037
25
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-1100/config_sentence_transformers.json DELETED
@@ -1,10 +0,0 @@
1
- {
2
- "__version__": {
3
- "sentence_transformers": "3.4.1",
4
- "transformers": "4.51.3",
5
- "pytorch": "2.7.0+cu126"
6
- },
7
- "prompts": {},
8
- "default_prompt_name": null,
9
- "similarity_fn_name": "cosine"
10
- }
 
 
 
 
 
 
 
 
 
 
 
checkpoint-1100/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:763540a5075ed486170f85323b5ee9b40182439ea8f51d889e8674424cce13c2
3
- size 470637416
 
 
 
 
checkpoint-1100/modules.json DELETED
@@ -1,14 +0,0 @@
1
- [
2
- {
3
- "idx": 0,
4
- "name": "0",
5
- "path": "",
6
- "type": "sentence_transformers.models.Transformer"
7
- },
8
- {
9
- "idx": 1,
10
- "name": "1",
11
- "path": "1_Pooling",
12
- "type": "sentence_transformers.models.Pooling"
13
- }
14
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-1100/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e0711e7f2b2b4728583424781a68346e4cb105f82c5b3e33e835ff6603b1b546
3
- size 1715019
 
 
 
 
checkpoint-1100/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d533d8579fdbb2634c2232f32ae13c2e79a071512c8f417a9f5453a5c0587c27
3
- size 14645
 
 
 
 
checkpoint-1100/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a3efc25b9c32ace074d8642ed698ba4f27854c75c4022587a44f288f2399a9b
3
- size 1465
 
 
 
 
checkpoint-1100/sentence_bert_config.json DELETED
@@ -1,4 +0,0 @@
1
- {
2
- "max_seq_length": 128,
3
- "do_lower_case": false
4
- }
 
 
 
 
 
checkpoint-1100/special_tokens_map.json DELETED
@@ -1,51 +0,0 @@
1
- {
2
- "bos_token": {
3
- "content": "<s>",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "cls_token": {
10
- "content": "<s>",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "eos_token": {
17
- "content": "</s>",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- },
23
- "mask_token": {
24
- "content": "<mask>",
25
- "lstrip": true,
26
- "normalized": false,
27
- "rstrip": false,
28
- "single_word": false
29
- },
30
- "pad_token": {
31
- "content": "<pad>",
32
- "lstrip": false,
33
- "normalized": false,
34
- "rstrip": false,
35
- "single_word": false
36
- },
37
- "sep_token": {
38
- "content": "</s>",
39
- "lstrip": false,
40
- "normalized": false,
41
- "rstrip": false,
42
- "single_word": false
43
- },
44
- "unk_token": {
45
- "content": "<unk>",
46
- "lstrip": false,
47
- "normalized": false,
48
- "rstrip": false,
49
- "single_word": false
50
- }
51
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-1100/tokenizer.json DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:cad551d5600a84242d0973327029452a1e3672ba6313c2a3c3d69c4310e12719
3
- size 17082987
 
 
 
 
checkpoint-1100/tokenizer_config.json DELETED
@@ -1,65 +0,0 @@
1
- {
2
- "added_tokens_decoder": {
3
- "0": {
4
- "content": "<s>",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false,
9
- "special": true
10
- },
11
- "1": {
12
- "content": "<pad>",
13
- "lstrip": false,
14
- "normalized": false,
15
- "rstrip": false,
16
- "single_word": false,
17
- "special": true
18
- },
19
- "2": {
20
- "content": "</s>",
21
- "lstrip": false,
22
- "normalized": false,
23
- "rstrip": false,
24
- "single_word": false,
25
- "special": true
26
- },
27
- "3": {
28
- "content": "<unk>",
29
- "lstrip": false,
30
- "normalized": false,
31
- "rstrip": false,
32
- "single_word": false,
33
- "special": true
34
- },
35
- "250001": {
36
- "content": "<mask>",
37
- "lstrip": true,
38
- "normalized": false,
39
- "rstrip": false,
40
- "single_word": false,
41
- "special": true
42
- }
43
- },
44
- "bos_token": "<s>",
45
- "clean_up_tokenization_spaces": false,
46
- "cls_token": "<s>",
47
- "do_lower_case": true,
48
- "eos_token": "</s>",
49
- "extra_special_tokens": {},
50
- "mask_token": "<mask>",
51
- "max_length": 128,
52
- "model_max_length": 128,
53
- "pad_to_multiple_of": null,
54
- "pad_token": "<pad>",
55
- "pad_token_type_id": 0,
56
- "padding_side": "right",
57
- "sep_token": "</s>",
58
- "stride": 0,
59
- "strip_accents": null,
60
- "tokenize_chinese_chars": true,
61
- "tokenizer_class": "BertTokenizer",
62
- "truncation_side": "right",
63
- "truncation_strategy": "longest_first",
64
- "unk_token": "<unk>"
65
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-1100/trainer_state.json DELETED
@@ -1,233 +0,0 @@
1
- {
2
- "best_global_step": 1100,
3
- "best_metric": 0.0018734760815277696,
4
- "best_model_checkpoint": "data/fine-tuned-sbert-sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2-original-adafactor/checkpoint-1100",
5
- "epoch": 2.0675739089629284,
6
- "eval_steps": 100,
7
- "global_step": 1100,
8
- "is_hyper_param_search": false,
9
- "is_local_process_zero": true,
10
- "is_world_process_zero": true,
11
- "log_history": [
12
- {
13
- "epoch": 0.18770530267480057,
14
- "eval_loss": 0.012530049309134483,
15
- "eval_runtime": 812.6802,
16
- "eval_samples_per_second": 3277.151,
17
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.8778235859541618,
18
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7128396034240723,
19
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.8848748516159781,
20
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.812583495899967,
21
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.6880456209182739,
22
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.7185793630359445,
23
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.7900823930955021,
24
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.8364038065429271,
25
- "eval_steps_per_second": 3.278,
26
- "step": 100
27
- },
28
- {
29
- "epoch": 0.37541060534960113,
30
- "eval_loss": 0.009013425558805466,
31
- "eval_runtime": 792.9843,
32
- "eval_samples_per_second": 3358.548,
33
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9164113424048541,
34
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7378441095352173,
35
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9368603114664952,
36
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.8729798695775446,
37
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7272344827651978,
38
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.8103205315460159,
39
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.8605654745268148,
40
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.8857576838544123,
41
- "eval_steps_per_second": 3.359,
42
- "step": 200
43
- },
44
- {
45
- "epoch": 0.5631159080244017,
46
- "eval_loss": 0.006819029338657856,
47
- "eval_runtime": 809.9704,
48
- "eval_samples_per_second": 3288.115,
49
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9398298338890391,
50
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7449667453765869,
51
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9629957356284182,
52
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9088032597499417,
53
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7449667453765869,
54
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.864029341509194,
55
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.8990159430733201,
56
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9188060251084542,
57
- "eval_steps_per_second": 3.289,
58
- "step": 300
59
- },
60
- {
61
- "epoch": 0.7508212106992023,
62
- "eval_loss": 0.005150709766894579,
63
- "eval_runtime": 797.9199,
64
- "eval_samples_per_second": 3337.773,
65
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9560016220600163,
66
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7553268671035767,
67
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9774059659768239,
68
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9333702119012406,
69
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7449506521224976,
70
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.9005457325671423,
71
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.916037892637527,
72
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9513710688929036,
73
- "eval_steps_per_second": 3.339,
74
- "step": 400
75
- },
76
- {
77
- "epoch": 0.9385265133740028,
78
- "grad_norm": 0.17396493256092072,
79
- "learning_rate": 2.9428198433420364e-05,
80
- "loss": 0.0409,
81
- "step": 500
82
- },
83
- {
84
- "epoch": 0.9385265133740028,
85
- "eval_loss": 0.003973629325628281,
86
- "eval_runtime": 809.4532,
87
- "eval_samples_per_second": 3290.216,
88
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9655950557207654,
89
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7622435092926025,
90
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9845099503823473,
91
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9477742208778024,
92
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7535413503646851,
93
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.9221773981286795,
94
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.9367750202319935,
95
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9590347859107281,
96
- "eval_steps_per_second": 3.291,
97
- "step": 500
98
- },
99
- {
100
- "epoch": 1.1276396058188645,
101
- "eval_loss": 0.0032712339889258146,
102
- "eval_runtime": 793.7573,
103
- "eval_samples_per_second": 3355.277,
104
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9712722657775374,
105
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7610360383987427,
106
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9887055977101925,
107
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9564087809158087,
108
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7610177993774414,
109
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.9350876149915242,
110
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.9471753898932449,
111
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9658239646502422,
112
- "eval_steps_per_second": 3.356,
113
- "step": 600
114
- },
115
- {
116
- "epoch": 1.3153449084936648,
117
- "eval_loss": 0.0028166945558041334,
118
- "eval_runtime": 815.1943,
119
- "eval_samples_per_second": 3267.044,
120
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9751246583160614,
121
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7577522993087769,
122
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9911117019106511,
123
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9621558129059113,
124
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7424367666244507,
125
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.943665667488554,
126
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.9536134909690983,
127
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9708525597505264,
128
- "eval_steps_per_second": 3.268,
129
- "step": 700
130
- },
131
- {
132
- "epoch": 1.5030502111684654,
133
- "eval_loss": 0.0026242006570100784,
134
- "eval_runtime": 805.7115,
135
- "eval_samples_per_second": 3305.496,
136
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9782673995974888,
137
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7254683971405029,
138
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9927214598054878,
139
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9669240257663667,
140
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7145971059799194,
141
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.9507846488068235,
142
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.9597660102710608,
143
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9741896137072368,
144
- "eval_steps_per_second": 3.306,
145
- "step": 800
146
- },
147
- {
148
- "epoch": 1.690755513843266,
149
- "eval_loss": 0.002248650649562478,
150
- "eval_runtime": 818.5338,
151
- "eval_samples_per_second": 3253.715,
152
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9801973506353069,
153
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7349117994308472,
154
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9938133122786723,
155
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9698356230196407,
156
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7348856329917908,
157
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.9551340483533577,
158
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.9641228578901284,
159
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9756164919507957,
160
- "eval_steps_per_second": 3.255,
161
- "step": 900
162
- },
163
- {
164
- "epoch": 1.8784608165180665,
165
- "grad_norm": 0.07541557401418686,
166
- "learning_rate": 2.5511749347258486e-05,
167
- "loss": 0.0131,
168
- "step": 1000
169
- },
170
- {
171
- "epoch": 1.8784608165180665,
172
- "eval_loss": 0.002240537665784359,
173
- "eval_runtime": 803.6286,
174
- "eval_samples_per_second": 3314.063,
175
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9817931272716349,
176
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7197962999343872,
177
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9944127523785896,
178
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9722373310278887,
179
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7091608047485352,
180
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.9587183163648803,
181
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.9675121928984912,
182
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9770088489465266,
183
- "eval_steps_per_second": 3.315,
184
- "step": 1000
185
- },
186
- {
187
- "epoch": 2.0675739089629284,
188
- "eval_loss": 0.0018734760815277696,
189
- "eval_runtime": 807.0812,
190
- "eval_samples_per_second": 3299.886,
191
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9828594815415578,
192
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7552986741065979,
193
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9950493119597241,
194
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.973889221813201,
195
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7401974201202393,
196
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.9611601510291333,
197
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.9661201195760486,
198
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9817842882294052,
199
- "eval_steps_per_second": 3.301,
200
- "step": 1100
201
- }
202
- ],
203
- "logging_steps": 500,
204
- "max_steps": 4256,
205
- "num_input_tokens_seen": 0,
206
- "num_train_epochs": 8,
207
- "save_steps": 100,
208
- "stateful_callbacks": {
209
- "EarlyStoppingCallback": {
210
- "args": {
211
- "early_stopping_patience": 1,
212
- "early_stopping_threshold": 0.0
213
- },
214
- "attributes": {
215
- "early_stopping_patience_counter": 0
216
- }
217
- },
218
- "TrainerControl": {
219
- "args": {
220
- "should_epoch_stop": false,
221
- "should_evaluate": false,
222
- "should_log": false,
223
- "should_save": true,
224
- "should_training_stop": false
225
- },
226
- "attributes": {}
227
- }
228
- },
229
- "total_flos": 0.0,
230
- "train_batch_size": 1000,
231
- "trial_name": null,
232
- "trial_params": null
233
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-1100/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9339753774865faea550d7da93688221ca0f43171c16e3034645a2149992c8a6
3
- size 6033
 
 
 
 
checkpoint-1100/unigram.json DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:da145b5e7700ae40f16691ec32a0b1fdc1ee3298db22a31ea55f57a966c4a65d
3
- size 14763260
 
 
 
 
checkpoint-1200/1_Pooling/config.json DELETED
@@ -1,10 +0,0 @@
1
- {
2
- "word_embedding_dimension": 384,
3
- "pooling_mode_cls_token": false,
4
- "pooling_mode_mean_tokens": true,
5
- "pooling_mode_max_tokens": false,
6
- "pooling_mode_mean_sqrt_len_tokens": false,
7
- "pooling_mode_weightedmean_tokens": false,
8
- "pooling_mode_lasttoken": false,
9
- "include_prompt": true
10
- }
 
 
 
 
 
 
 
 
 
 
 
checkpoint-1200/README.md DELETED
@@ -1,468 +0,0 @@
1
- ---
2
- language:
3
- - en
4
- license: apache-2.0
5
- tags:
6
- - sentence-transformers
7
- - sentence-similarity
8
- - feature-extraction
9
- - generated_from_trainer
10
- - dataset_size:2130621
11
- - loss:ContrastiveLoss
12
- base_model: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
13
- widget:
14
- - source_sentence: Kim Chol-sam
15
- sentences:
16
- - Stankevich Sergey Nikolayevich
17
- - Kim Chin-So’k
18
- - Julen Lopetegui Agote
19
- - source_sentence: دينا بنت عبد الحميد
20
- sentences:
21
- - Alexia van Amsberg
22
- - Anthony Nicholas Colin Maitland Biddulph, 5th Baron Biddulph
23
- - Dina bint Abdul-Hamíd
24
- - source_sentence: Մուհամեդ բեն Նաիֆ Ալ Սաուդ
25
- sentences:
26
- - Karpov Anatoly Evgenyevich
27
- - GNPower Mariveles Coal Plant [former]
28
- - Muhammed bin Nayef bin Abdul Aziz Al Saud
29
- - source_sentence: Edward Gnehm
30
- sentences:
31
- - Шауэрте, Хартмут
32
- - Ханзада Филипп, Эдинбург герцогі
33
- - AFX
34
- - source_sentence: Schori i Lidingö
35
- sentences:
36
- - Yordan Canev
37
- - ကားပေါ့ အန်နာတိုလီ
38
- - BYSTROV, Mikhail Ivanovich
39
- pipeline_tag: sentence-similarity
40
- library_name: sentence-transformers
41
- metrics:
42
- - cosine_accuracy
43
- - cosine_accuracy_threshold
44
- - cosine_f1
45
- - cosine_f1_threshold
46
- - cosine_precision
47
- - cosine_recall
48
- - cosine_ap
49
- - cosine_mcc
50
- model-index:
51
- - name: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2-name-matcher-original
52
- results:
53
- - task:
54
- type: binary-classification
55
- name: Binary Classification
56
- dataset:
57
- name: sentence transformers paraphrase multilingual MiniLM L12 v2
58
- type: sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2
59
- metrics:
60
- - type: cosine_accuracy
61
- value: 0.9843050674356433
62
- name: Cosine Accuracy
63
- - type: cosine_accuracy_threshold
64
- value: 0.742120623588562
65
- name: Cosine Accuracy Threshold
66
- - type: cosine_f1
67
- value: 0.9760932477723254
68
- name: Cosine F1
69
- - type: cosine_f1_threshold
70
- value: 0.742120623588562
71
- name: Cosine F1 Threshold
72
- - type: cosine_precision
73
- value: 0.9703216856372878
74
- name: Cosine Precision
75
- - type: cosine_recall
76
- value: 0.9819338803033267
77
- name: Cosine Recall
78
- - type: cosine_ap
79
- value: 0.9955554741842152
80
- name: Cosine Ap
81
- - type: cosine_mcc
82
- value: 0.964449493634366
83
- name: Cosine Mcc
84
- ---
85
-
86
- # sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2-name-matcher-original
87
-
88
- This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2). It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
89
-
90
- ## Model Details
91
-
92
- ### Model Description
93
- - **Model Type:** Sentence Transformer
94
- - **Base model:** [sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2) <!-- at revision 86741b4e3f5cb7765a600d3a3d55a0f6a6cb443d -->
95
- - **Maximum Sequence Length:** 128 tokens
96
- - **Output Dimensionality:** 384 dimensions
97
- - **Similarity Function:** Cosine Similarity
98
- <!-- - **Training Dataset:** Unknown -->
99
- - **Language:** en
100
- - **License:** apache-2.0
101
-
102
- ### Model Sources
103
-
104
- - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
105
- - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
106
- - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
107
-
108
- ### Full Model Architecture
109
-
110
- ```
111
- SentenceTransformer(
112
- (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel
113
- (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
114
- )
115
- ```
116
-
117
- ## Usage
118
-
119
- ### Direct Usage (Sentence Transformers)
120
-
121
- First install the Sentence Transformers library:
122
-
123
- ```bash
124
- pip install -U sentence-transformers
125
- ```
126
-
127
- Then you can load this model and run inference.
128
- ```python
129
- from sentence_transformers import SentenceTransformer
130
-
131
- # Download from the 🤗 Hub
132
- model = SentenceTransformer("sentence_transformers_model_id")
133
- # Run inference
134
- sentences = [
135
- 'Schori i Lidingö',
136
- 'Yordan Canev',
137
- 'ကားပေါ့ အန်နာတိုလီ',
138
- ]
139
- embeddings = model.encode(sentences)
140
- print(embeddings.shape)
141
- # [3, 384]
142
-
143
- # Get the similarity scores for the embeddings
144
- similarities = model.similarity(embeddings, embeddings)
145
- print(similarities.shape)
146
- # [3, 3]
147
- ```
148
-
149
- <!--
150
- ### Direct Usage (Transformers)
151
-
152
- <details><summary>Click to see the direct usage in Transformers</summary>
153
-
154
- </details>
155
- -->
156
-
157
- <!--
158
- ### Downstream Usage (Sentence Transformers)
159
-
160
- You can finetune this model on your own dataset.
161
-
162
- <details><summary>Click to expand</summary>
163
-
164
- </details>
165
- -->
166
-
167
- <!--
168
- ### Out-of-Scope Use
169
-
170
- *List how the model may foreseeably be misused and address what users ought not to do with the model.*
171
- -->
172
-
173
- ## Evaluation
174
-
175
- ### Metrics
176
-
177
- #### Binary Classification
178
-
179
- * Dataset: `sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2`
180
- * Evaluated with [<code>BinaryClassificationEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.BinaryClassificationEvaluator)
181
-
182
- | Metric | Value |
183
- |:--------------------------|:-----------|
184
- | cosine_accuracy | 0.9843 |
185
- | cosine_accuracy_threshold | 0.7421 |
186
- | cosine_f1 | 0.9761 |
187
- | cosine_f1_threshold | 0.7421 |
188
- | cosine_precision | 0.9703 |
189
- | cosine_recall | 0.9819 |
190
- | **cosine_ap** | **0.9956** |
191
- | cosine_mcc | 0.9644 |
192
-
193
- <!--
194
- ## Bias, Risks and Limitations
195
-
196
- *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
197
- -->
198
-
199
- <!--
200
- ### Recommendations
201
-
202
- *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
203
- -->
204
-
205
- ## Training Details
206
-
207
- ### Training Dataset
208
-
209
- #### Unnamed Dataset
210
-
211
- * Size: 2,130,621 training samples
212
- * Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>label</code>
213
- * Approximate statistics based on the first 1000 samples:
214
- | | sentence1 | sentence2 | label |
215
- |:--------|:---------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|:---------------------------------------------------------------|
216
- | type | string | string | float |
217
- | details | <ul><li>min: 3 tokens</li><li>mean: 9.32 tokens</li><li>max: 57 tokens</li></ul> | <ul><li>min: 3 tokens</li><li>mean: 9.16 tokens</li><li>max: 54 tokens</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.34</li><li>max: 1.0</li></ul> |
218
- * Samples:
219
- | sentence1 | sentence2 | label |
220
- |:----------------------------------|:------------------------------------|:-----------------|
221
- | <code>캐스린 설리번</code> | <code>Kathryn D. Sullivanová</code> | <code>1.0</code> |
222
- | <code>ଶିବରାଜ ଅଧାଲରାଓ ପାଟିଲ</code> | <code>Aleksander Lubocki</code> | <code>0.0</code> |
223
- | <code>Пырванов, Георги</code> | <code>アナトーリー・セルジュコフ</code> | <code>0.0</code> |
224
- * Loss: [<code>ContrastiveLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#contrastiveloss) with these parameters:
225
- ```json
226
- {
227
- "distance_metric": "SiameseDistanceMetric.COSINE_DISTANCE",
228
- "margin": 0.5,
229
- "size_average": true
230
- }
231
- ```
232
-
233
- ### Evaluation Dataset
234
-
235
- #### Unnamed Dataset
236
-
237
- * Size: 2,663,276 evaluation samples
238
- * Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>label</code>
239
- * Approximate statistics based on the first 1000 samples:
240
- | | sentence1 | sentence2 | label |
241
- |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:---------------------------------------------------------------|
242
- | type | string | string | float |
243
- | details | <ul><li>min: 3 tokens</li><li>mean: 9.34 tokens</li><li>max: 102 tokens</li></ul> | <ul><li>min: 4 tokens</li><li>mean: 9.11 tokens</li><li>max: 100 tokens</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.33</li><li>max: 1.0</li></ul> |
244
- * Samples:
245
- | sentence1 | sentence2 | label |
246
- |:--------------------------------------|:---------------------------------------|:-----------------|
247
- | <code>Ева Херман</code> | <code>I Xuan Karlos</code> | <code>0.0</code> |
248
- | <code>Кличков Андрій Євгенович</code> | <code>Андрэй Яўгенавіч Клычкоў</code> | <code>1.0</code> |
249
- | <code>Кинах А.</code> | <code>Senator John Hickenlooper</code> | <code>0.0</code> |
250
- * Loss: [<code>ContrastiveLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#contrastiveloss) with these parameters:
251
- ```json
252
- {
253
- "distance_metric": "SiameseDistanceMetric.COSINE_DISTANCE",
254
- "margin": 0.5,
255
- "size_average": true
256
- }
257
- ```
258
-
259
- ### Training Hyperparameters
260
- #### Non-Default Hyperparameters
261
-
262
- - `eval_strategy`: steps
263
- - `per_device_train_batch_size`: 1000
264
- - `per_device_eval_batch_size`: 1000
265
- - `gradient_accumulation_steps`: 4
266
- - `learning_rate`: 3e-05
267
- - `weight_decay`: 0.01
268
- - `num_train_epochs`: 8
269
- - `warmup_ratio`: 0.1
270
- - `fp16_opt_level`: O0
271
- - `load_best_model_at_end`: True
272
- - `optim`: adafactor
273
-
274
- #### All Hyperparameters
275
- <details><summary>Click to expand</summary>
276
-
277
- - `overwrite_output_dir`: False
278
- - `do_predict`: False
279
- - `eval_strategy`: steps
280
- - `prediction_loss_only`: True
281
- - `per_device_train_batch_size`: 1000
282
- - `per_device_eval_batch_size`: 1000
283
- - `per_gpu_train_batch_size`: None
284
- - `per_gpu_eval_batch_size`: None
285
- - `gradient_accumulation_steps`: 4
286
- - `eval_accumulation_steps`: None
287
- - `torch_empty_cache_steps`: None
288
- - `learning_rate`: 3e-05
289
- - `weight_decay`: 0.01
290
- - `adam_beta1`: 0.9
291
- - `adam_beta2`: 0.999
292
- - `adam_epsilon`: 1e-08
293
- - `max_grad_norm`: 1.0
294
- - `num_train_epochs`: 8
295
- - `max_steps`: -1
296
- - `lr_scheduler_type`: linear
297
- - `lr_scheduler_kwargs`: {}
298
- - `warmup_ratio`: 0.1
299
- - `warmup_steps`: 0
300
- - `log_level`: passive
301
- - `log_level_replica`: warning
302
- - `log_on_each_node`: True
303
- - `logging_nan_inf_filter`: True
304
- - `save_safetensors`: True
305
- - `save_on_each_node`: False
306
- - `save_only_model`: False
307
- - `restore_callback_states_from_checkpoint`: False
308
- - `no_cuda`: False
309
- - `use_cpu`: False
310
- - `use_mps_device`: False
311
- - `seed`: 42
312
- - `data_seed`: None
313
- - `jit_mode_eval`: False
314
- - `use_ipex`: False
315
- - `bf16`: False
316
- - `fp16`: False
317
- - `fp16_opt_level`: O0
318
- - `half_precision_backend`: auto
319
- - `bf16_full_eval`: False
320
- - `fp16_full_eval`: False
321
- - `tf32`: None
322
- - `local_rank`: 0
323
- - `ddp_backend`: None
324
- - `tpu_num_cores`: None
325
- - `tpu_metrics_debug`: False
326
- - `debug`: []
327
- - `dataloader_drop_last`: False
328
- - `dataloader_num_workers`: 0
329
- - `dataloader_prefetch_factor`: None
330
- - `past_index`: -1
331
- - `disable_tqdm`: False
332
- - `remove_unused_columns`: True
333
- - `label_names`: None
334
- - `load_best_model_at_end`: True
335
- - `ignore_data_skip`: False
336
- - `fsdp`: []
337
- - `fsdp_min_num_params`: 0
338
- - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
339
- - `tp_size`: 0
340
- - `fsdp_transformer_layer_cls_to_wrap`: None
341
- - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
342
- - `deepspeed`: None
343
- - `label_smoothing_factor`: 0.0
344
- - `optim`: adafactor
345
- - `optim_args`: None
346
- - `adafactor`: False
347
- - `group_by_length`: False
348
- - `length_column_name`: length
349
- - `ddp_find_unused_parameters`: None
350
- - `ddp_bucket_cap_mb`: None
351
- - `ddp_broadcast_buffers`: False
352
- - `dataloader_pin_memory`: True
353
- - `dataloader_persistent_workers`: False
354
- - `skip_memory_metrics`: True
355
- - `use_legacy_prediction_loop`: False
356
- - `push_to_hub`: False
357
- - `resume_from_checkpoint`: None
358
- - `hub_model_id`: None
359
- - `hub_strategy`: every_save
360
- - `hub_private_repo`: None
361
- - `hub_always_push`: False
362
- - `gradient_checkpointing`: False
363
- - `gradient_checkpointing_kwargs`: None
364
- - `include_inputs_for_metrics`: False
365
- - `include_for_metrics`: []
366
- - `eval_do_concat_batches`: True
367
- - `fp16_backend`: auto
368
- - `push_to_hub_model_id`: None
369
- - `push_to_hub_organization`: None
370
- - `mp_parameters`:
371
- - `auto_find_batch_size`: False
372
- - `full_determinism`: False
373
- - `torchdynamo`: None
374
- - `ray_scope`: last
375
- - `ddp_timeout`: 1800
376
- - `torch_compile`: False
377
- - `torch_compile_backend`: None
378
- - `torch_compile_mode`: None
379
- - `include_tokens_per_second`: False
380
- - `include_num_input_tokens_seen`: False
381
- - `neftune_noise_alpha`: None
382
- - `optim_target_modules`: None
383
- - `batch_eval_metrics`: False
384
- - `eval_on_start`: False
385
- - `use_liger_kernel`: False
386
- - `eval_use_gather_object`: False
387
- - `average_tokens_across_devices`: False
388
- - `prompts`: None
389
- - `batch_sampler`: batch_sampler
390
- - `multi_dataset_batch_sampler`: proportional
391
-
392
- </details>
393
-
394
- ### Training Logs
395
- | Epoch | Step | Training Loss | Validation Loss | sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap |
396
- |:------:|:----:|:-------------:|:---------------:|:---------------------------------------------------------------------:|
397
- | -1 | -1 | - | - | 0.7140 |
398
- | 0.1877 | 100 | - | 0.0125 | 0.8849 |
399
- | 0.3754 | 200 | - | 0.0090 | 0.9369 |
400
- | 0.5631 | 300 | - | 0.0068 | 0.9630 |
401
- | 0.7508 | 400 | - | 0.0052 | 0.9774 |
402
- | 0.9385 | 500 | 0.0409 | 0.0040 | 0.9845 |
403
- | 1.1276 | 600 | - | 0.0033 | 0.9887 |
404
- | 1.3153 | 700 | - | 0.0028 | 0.9911 |
405
- | 1.5031 | 800 | - | 0.0026 | 0.9927 |
406
- | 1.6908 | 900 | - | 0.0022 | 0.9938 |
407
- | 1.8785 | 1000 | 0.0131 | 0.0022 | 0.9944 |
408
- | 2.0676 | 1100 | - | 0.0019 | 0.9950 |
409
- | 2.2553 | 1200 | - | 0.0017 | 0.9956 |
410
-
411
-
412
- ### Framework Versions
413
- - Python: 3.12.9
414
- - Sentence Transformers: 3.4.1
415
- - Transformers: 4.51.3
416
- - PyTorch: 2.7.0+cu126
417
- - Accelerate: 1.6.0
418
- - Datasets: 3.6.0
419
- - Tokenizers: 0.21.1
420
-
421
- ## Citation
422
-
423
- ### BibTeX
424
-
425
- #### Sentence Transformers
426
- ```bibtex
427
- @inproceedings{reimers-2019-sentence-bert,
428
- title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
429
- author = "Reimers, Nils and Gurevych, Iryna",
430
- booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
431
- month = "11",
432
- year = "2019",
433
- publisher = "Association for Computational Linguistics",
434
- url = "https://arxiv.org/abs/1908.10084",
435
- }
436
- ```
437
-
438
- #### ContrastiveLoss
439
- ```bibtex
440
- @inproceedings{hadsell2006dimensionality,
441
- author={Hadsell, R. and Chopra, S. and LeCun, Y.},
442
- booktitle={2006 IEEE Computer Society Conference on Computer Vision and Pattern Recognition (CVPR'06)},
443
- title={Dimensionality Reduction by Learning an Invariant Mapping},
444
- year={2006},
445
- volume={2},
446
- number={},
447
- pages={1735-1742},
448
- doi={10.1109/CVPR.2006.100}
449
- }
450
- ```
451
-
452
- <!--
453
- ## Glossary
454
-
455
- *Clearly define terms in order to be accessible across audiences.*
456
- -->
457
-
458
- <!--
459
- ## Model Card Authors
460
-
461
- *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
462
- -->
463
-
464
- <!--
465
- ## Model Card Contact
466
-
467
- *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
468
- -->
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-1200/config.json DELETED
@@ -1,25 +0,0 @@
1
- {
2
- "architectures": [
3
- "BertModel"
4
- ],
5
- "attention_probs_dropout_prob": 0.1,
6
- "classifier_dropout": null,
7
- "gradient_checkpointing": false,
8
- "hidden_act": "gelu",
9
- "hidden_dropout_prob": 0.1,
10
- "hidden_size": 384,
11
- "initializer_range": 0.02,
12
- "intermediate_size": 1536,
13
- "layer_norm_eps": 1e-12,
14
- "max_position_embeddings": 512,
15
- "model_type": "bert",
16
- "num_attention_heads": 12,
17
- "num_hidden_layers": 12,
18
- "pad_token_id": 0,
19
- "position_embedding_type": "absolute",
20
- "torch_dtype": "float32",
21
- "transformers_version": "4.51.3",
22
- "type_vocab_size": 2,
23
- "use_cache": true,
24
- "vocab_size": 250037
25
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-1200/config_sentence_transformers.json DELETED
@@ -1,10 +0,0 @@
1
- {
2
- "__version__": {
3
- "sentence_transformers": "3.4.1",
4
- "transformers": "4.51.3",
5
- "pytorch": "2.7.0+cu126"
6
- },
7
- "prompts": {},
8
- "default_prompt_name": null,
9
- "similarity_fn_name": "cosine"
10
- }
 
 
 
 
 
 
 
 
 
 
 
checkpoint-1200/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a16798609ad3be64f1c33cafbc6d8595006225a97722265fbba67e2dfaf916a
3
- size 470637416
 
 
 
 
checkpoint-1200/modules.json DELETED
@@ -1,14 +0,0 @@
1
- [
2
- {
3
- "idx": 0,
4
- "name": "0",
5
- "path": "",
6
- "type": "sentence_transformers.models.Transformer"
7
- },
8
- {
9
- "idx": 1,
10
- "name": "1",
11
- "path": "1_Pooling",
12
- "type": "sentence_transformers.models.Pooling"
13
- }
14
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-1200/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e6c0d2369b6fe2e14855dd6ee01f523f4a5901a968149aedc664f3defacc964
3
- size 1715019
 
 
 
 
checkpoint-1200/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:51a5e12d95d9820ac91d074df8188d98ce5f4fc76cb3ec8a63d860d96a200697
3
- size 14645
 
 
 
 
checkpoint-1200/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:efdbea4967733a900bbe36cb7fab0e417825ab1560e9b509550180a0f55ecc51
3
- size 1465
 
 
 
 
checkpoint-1200/sentence_bert_config.json DELETED
@@ -1,4 +0,0 @@
1
- {
2
- "max_seq_length": 128,
3
- "do_lower_case": false
4
- }
 
 
 
 
 
checkpoint-1200/special_tokens_map.json DELETED
@@ -1,51 +0,0 @@
1
- {
2
- "bos_token": {
3
- "content": "<s>",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "cls_token": {
10
- "content": "<s>",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "eos_token": {
17
- "content": "</s>",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- },
23
- "mask_token": {
24
- "content": "<mask>",
25
- "lstrip": true,
26
- "normalized": false,
27
- "rstrip": false,
28
- "single_word": false
29
- },
30
- "pad_token": {
31
- "content": "<pad>",
32
- "lstrip": false,
33
- "normalized": false,
34
- "rstrip": false,
35
- "single_word": false
36
- },
37
- "sep_token": {
38
- "content": "</s>",
39
- "lstrip": false,
40
- "normalized": false,
41
- "rstrip": false,
42
- "single_word": false
43
- },
44
- "unk_token": {
45
- "content": "<unk>",
46
- "lstrip": false,
47
- "normalized": false,
48
- "rstrip": false,
49
- "single_word": false
50
- }
51
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-1200/tokenizer.json DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:cad551d5600a84242d0973327029452a1e3672ba6313c2a3c3d69c4310e12719
3
- size 17082987
 
 
 
 
checkpoint-1200/tokenizer_config.json DELETED
@@ -1,65 +0,0 @@
1
- {
2
- "added_tokens_decoder": {
3
- "0": {
4
- "content": "<s>",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false,
9
- "special": true
10
- },
11
- "1": {
12
- "content": "<pad>",
13
- "lstrip": false,
14
- "normalized": false,
15
- "rstrip": false,
16
- "single_word": false,
17
- "special": true
18
- },
19
- "2": {
20
- "content": "</s>",
21
- "lstrip": false,
22
- "normalized": false,
23
- "rstrip": false,
24
- "single_word": false,
25
- "special": true
26
- },
27
- "3": {
28
- "content": "<unk>",
29
- "lstrip": false,
30
- "normalized": false,
31
- "rstrip": false,
32
- "single_word": false,
33
- "special": true
34
- },
35
- "250001": {
36
- "content": "<mask>",
37
- "lstrip": true,
38
- "normalized": false,
39
- "rstrip": false,
40
- "single_word": false,
41
- "special": true
42
- }
43
- },
44
- "bos_token": "<s>",
45
- "clean_up_tokenization_spaces": false,
46
- "cls_token": "<s>",
47
- "do_lower_case": true,
48
- "eos_token": "</s>",
49
- "extra_special_tokens": {},
50
- "mask_token": "<mask>",
51
- "max_length": 128,
52
- "model_max_length": 128,
53
- "pad_to_multiple_of": null,
54
- "pad_token": "<pad>",
55
- "pad_token_type_id": 0,
56
- "padding_side": "right",
57
- "sep_token": "</s>",
58
- "stride": 0,
59
- "strip_accents": null,
60
- "tokenize_chinese_chars": true,
61
- "tokenizer_class": "BertTokenizer",
62
- "truncation_side": "right",
63
- "truncation_strategy": "longest_first",
64
- "unk_token": "<unk>"
65
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-1200/trainer_state.json DELETED
@@ -1,249 +0,0 @@
1
- {
2
- "best_global_step": 1200,
3
- "best_metric": 0.0017435119953006506,
4
- "best_model_checkpoint": "data/fine-tuned-sbert-sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2-original-adafactor/checkpoint-1200",
5
- "epoch": 2.255279211637729,
6
- "eval_steps": 100,
7
- "global_step": 1200,
8
- "is_hyper_param_search": false,
9
- "is_local_process_zero": true,
10
- "is_world_process_zero": true,
11
- "log_history": [
12
- {
13
- "epoch": 0.18770530267480057,
14
- "eval_loss": 0.012530049309134483,
15
- "eval_runtime": 812.6802,
16
- "eval_samples_per_second": 3277.151,
17
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.8778235859541618,
18
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7128396034240723,
19
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.8848748516159781,
20
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.812583495899967,
21
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.6880456209182739,
22
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.7185793630359445,
23
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.7900823930955021,
24
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.8364038065429271,
25
- "eval_steps_per_second": 3.278,
26
- "step": 100
27
- },
28
- {
29
- "epoch": 0.37541060534960113,
30
- "eval_loss": 0.009013425558805466,
31
- "eval_runtime": 792.9843,
32
- "eval_samples_per_second": 3358.548,
33
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9164113424048541,
34
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7378441095352173,
35
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9368603114664952,
36
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.8729798695775446,
37
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7272344827651978,
38
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.8103205315460159,
39
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.8605654745268148,
40
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.8857576838544123,
41
- "eval_steps_per_second": 3.359,
42
- "step": 200
43
- },
44
- {
45
- "epoch": 0.5631159080244017,
46
- "eval_loss": 0.006819029338657856,
47
- "eval_runtime": 809.9704,
48
- "eval_samples_per_second": 3288.115,
49
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9398298338890391,
50
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7449667453765869,
51
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9629957356284182,
52
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9088032597499417,
53
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7449667453765869,
54
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.864029341509194,
55
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.8990159430733201,
56
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9188060251084542,
57
- "eval_steps_per_second": 3.289,
58
- "step": 300
59
- },
60
- {
61
- "epoch": 0.7508212106992023,
62
- "eval_loss": 0.005150709766894579,
63
- "eval_runtime": 797.9199,
64
- "eval_samples_per_second": 3337.773,
65
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9560016220600163,
66
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7553268671035767,
67
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9774059659768239,
68
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9333702119012406,
69
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7449506521224976,
70
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.9005457325671423,
71
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.916037892637527,
72
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9513710688929036,
73
- "eval_steps_per_second": 3.339,
74
- "step": 400
75
- },
76
- {
77
- "epoch": 0.9385265133740028,
78
- "grad_norm": 0.17396493256092072,
79
- "learning_rate": 2.9428198433420364e-05,
80
- "loss": 0.0409,
81
- "step": 500
82
- },
83
- {
84
- "epoch": 0.9385265133740028,
85
- "eval_loss": 0.003973629325628281,
86
- "eval_runtime": 809.4532,
87
- "eval_samples_per_second": 3290.216,
88
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9655950557207654,
89
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7622435092926025,
90
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9845099503823473,
91
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9477742208778024,
92
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7535413503646851,
93
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.9221773981286795,
94
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.9367750202319935,
95
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9590347859107281,
96
- "eval_steps_per_second": 3.291,
97
- "step": 500
98
- },
99
- {
100
- "epoch": 1.1276396058188645,
101
- "eval_loss": 0.0032712339889258146,
102
- "eval_runtime": 793.7573,
103
- "eval_samples_per_second": 3355.277,
104
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9712722657775374,
105
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7610360383987427,
106
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9887055977101925,
107
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9564087809158087,
108
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7610177993774414,
109
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.9350876149915242,
110
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.9471753898932449,
111
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9658239646502422,
112
- "eval_steps_per_second": 3.356,
113
- "step": 600
114
- },
115
- {
116
- "epoch": 1.3153449084936648,
117
- "eval_loss": 0.0028166945558041334,
118
- "eval_runtime": 815.1943,
119
- "eval_samples_per_second": 3267.044,
120
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9751246583160614,
121
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7577522993087769,
122
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9911117019106511,
123
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9621558129059113,
124
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7424367666244507,
125
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.943665667488554,
126
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.9536134909690983,
127
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9708525597505264,
128
- "eval_steps_per_second": 3.268,
129
- "step": 700
130
- },
131
- {
132
- "epoch": 1.5030502111684654,
133
- "eval_loss": 0.0026242006570100784,
134
- "eval_runtime": 805.7115,
135
- "eval_samples_per_second": 3305.496,
136
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9782673995974888,
137
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7254683971405029,
138
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9927214598054878,
139
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9669240257663667,
140
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7145971059799194,
141
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.9507846488068235,
142
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.9597660102710608,
143
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9741896137072368,
144
- "eval_steps_per_second": 3.306,
145
- "step": 800
146
- },
147
- {
148
- "epoch": 1.690755513843266,
149
- "eval_loss": 0.002248650649562478,
150
- "eval_runtime": 818.5338,
151
- "eval_samples_per_second": 3253.715,
152
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9801973506353069,
153
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7349117994308472,
154
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9938133122786723,
155
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9698356230196407,
156
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7348856329917908,
157
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.9551340483533577,
158
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.9641228578901284,
159
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9756164919507957,
160
- "eval_steps_per_second": 3.255,
161
- "step": 900
162
- },
163
- {
164
- "epoch": 1.8784608165180665,
165
- "grad_norm": 0.07541557401418686,
166
- "learning_rate": 2.5511749347258486e-05,
167
- "loss": 0.0131,
168
- "step": 1000
169
- },
170
- {
171
- "epoch": 1.8784608165180665,
172
- "eval_loss": 0.002240537665784359,
173
- "eval_runtime": 803.6286,
174
- "eval_samples_per_second": 3314.063,
175
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9817931272716349,
176
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7197962999343872,
177
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9944127523785896,
178
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9722373310278887,
179
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7091608047485352,
180
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.9587183163648803,
181
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.9675121928984912,
182
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9770088489465266,
183
- "eval_steps_per_second": 3.315,
184
- "step": 1000
185
- },
186
- {
187
- "epoch": 2.0675739089629284,
188
- "eval_loss": 0.0018734760815277696,
189
- "eval_runtime": 807.0812,
190
- "eval_samples_per_second": 3299.886,
191
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9828594815415578,
192
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.7552986741065979,
193
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9950493119597241,
194
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.973889221813201,
195
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.7401974201202393,
196
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.9611601510291333,
197
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.9661201195760486,
198
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9817842882294052,
199
- "eval_steps_per_second": 3.301,
200
- "step": 1100
201
- },
202
- {
203
- "epoch": 2.255279211637729,
204
- "eval_loss": 0.0017435119953006506,
205
- "eval_runtime": 802.6162,
206
- "eval_samples_per_second": 3318.244,
207
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy": 0.9843050674356433,
208
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_accuracy_threshold": 0.742120623588562,
209
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_ap": 0.9955554741842152,
210
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1": 0.9760932477723254,
211
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_f1_threshold": 0.742120623588562,
212
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_mcc": 0.964449493634366,
213
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_precision": 0.9703216856372878,
214
- "eval_sentence-transformers-paraphrase-multilingual-MiniLM-L12-v2_cosine_recall": 0.9819338803033267,
215
- "eval_steps_per_second": 3.319,
216
- "step": 1200
217
- }
218
- ],
219
- "logging_steps": 500,
220
- "max_steps": 4256,
221
- "num_input_tokens_seen": 0,
222
- "num_train_epochs": 8,
223
- "save_steps": 100,
224
- "stateful_callbacks": {
225
- "EarlyStoppingCallback": {
226
- "args": {
227
- "early_stopping_patience": 1,
228
- "early_stopping_threshold": 0.0
229
- },
230
- "attributes": {
231
- "early_stopping_patience_counter": 0
232
- }
233
- },
234
- "TrainerControl": {
235
- "args": {
236
- "should_epoch_stop": false,
237
- "should_evaluate": false,
238
- "should_log": false,
239
- "should_save": true,
240
- "should_training_stop": false
241
- },
242
- "attributes": {}
243
- }
244
- },
245
- "total_flos": 0.0,
246
- "train_batch_size": 1000,
247
- "trial_name": null,
248
- "trial_params": null
249
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-1200/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9339753774865faea550d7da93688221ca0f43171c16e3034645a2149992c8a6
3
- size 6033
 
 
 
 
checkpoint-1200/unigram.json DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:da145b5e7700ae40f16691ec32a0b1fdc1ee3298db22a31ea55f57a966c4a65d
3
- size 14763260
 
 
 
 
checkpoint-1300/1_Pooling/config.json DELETED
@@ -1,10 +0,0 @@
1
- {
2
- "word_embedding_dimension": 384,
3
- "pooling_mode_cls_token": false,
4
- "pooling_mode_mean_tokens": true,
5
- "pooling_mode_max_tokens": false,
6
- "pooling_mode_mean_sqrt_len_tokens": false,
7
- "pooling_mode_weightedmean_tokens": false,
8
- "pooling_mode_lasttoken": false,
9
- "include_prompt": true
10
- }