Training in progress, step 8400, checkpoint

Browse files

Files changed (12) hide show

last-checkpoint/1_Pooling/config.json +8 -8
last-checkpoint/README.md +43 -130
last-checkpoint/config.json +1 -1
last-checkpoint/config_sentence_transformers.json +4 -8
last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +2 -2
last-checkpoint/rng_state.pth +2 -2
last-checkpoint/scaler.pt +2 -2
last-checkpoint/scheduler.pt +2 -2
last-checkpoint/sentence_bert_config.json +2 -2
last-checkpoint/trainer_state.json +30 -2
last-checkpoint/training_args.bin +2 -2

last-checkpoint/1_Pooling/config.json CHANGED Viewed

@@ -1,10 +1,10 @@
 {
-    "word_embedding_dimension": 384,
-    "pooling_mode_cls_token": false,
-    "pooling_mode_mean_tokens": true,
-    "pooling_mode_max_tokens": false,
-    "pooling_mode_mean_sqrt_len_tokens": false,
-    "pooling_mode_weightedmean_tokens": false,
-    "pooling_mode_lasttoken": false,
-    "include_prompt": true
 }

 {
+  "word_embedding_dimension": 384,
+  "pooling_mode_cls_token": false,
+  "pooling_mode_mean_tokens": true,
+  "pooling_mode_max_tokens": false,
+  "pooling_mode_mean_sqrt_len_tokens": false,
+  "pooling_mode_weightedmean_tokens": false,
+  "pooling_mode_lasttoken": false,
+  "include_prompt": true
 }

last-checkpoint/README.md CHANGED Viewed

@@ -3,7 +3,6 @@ tags:
 - sentence-transformers
 - sentence-similarity
 - feature-extraction
-- dense
 - generated_from_trainer
 - dataset_size:14483470
 - loss:MultipleNegativesRankingLoss
@@ -251,7 +250,7 @@ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [s
 ```
 SentenceTransformer(
-  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False, 'architecture': 'BertModel'})
   (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
   (2): Normalize()
 )
@@ -285,10 +284,8 @@ print(embeddings.shape)
 # Get the similarity scores for the embeddings
 similarities = model.similarity(embeddings, embeddings)
-print(similarities)
-# tensor([[1.0000, 0.7055, 0.1480],
-#         [0.7055, 1.0000, 0.1624],
-#         [0.1480, 0.1624, 1.0000]])
 ```
 <!--
@@ -352,8 +349,7 @@ You can finetune this model on your own dataset.
   ```json
   {
       "scale": 20.0,
-      "similarity_fct": "cos_sim",
-      "gather_across_devices": false
   }
   ```
 </details>
@@ -379,8 +375,7 @@ You can finetune this model on your own dataset.
   ```json
   {
       "scale": 20.0,
-      "similarity_fct": "cos_sim",
-      "gather_across_devices": false
   }
   ```
 </details>
@@ -406,8 +401,7 @@ You can finetune this model on your own dataset.
   ```json
   {
       "scale": 20.0,
-      "similarity_fct": "cos_sim",
-      "gather_across_devices": false
   }
   ```
 </details>
@@ -433,8 +427,7 @@ You can finetune this model on your own dataset.
   ```json
   {
       "scale": 20.0,
-      "similarity_fct": "cos_sim",
-      "gather_across_devices": false
   }
   ```
 </details>
@@ -460,8 +453,7 @@ You can finetune this model on your own dataset.
   ```json
   {
       "scale": 20.0,
-      "similarity_fct": "cos_sim",
-      "gather_across_devices": false
   }
   ```
 </details>
@@ -487,8 +479,7 @@ You can finetune this model on your own dataset.
   ```json
   {
       "scale": 20.0,
-      "similarity_fct": "cos_sim",
-      "gather_across_devices": false
   }
   ```
 </details>
@@ -514,8 +505,7 @@ You can finetune this model on your own dataset.
   ```json
   {
       "scale": 20.0,
-      "similarity_fct": "cos_sim",
-      "gather_across_devices": false
   }
   ```
 </details>
@@ -541,8 +531,7 @@ You can finetune this model on your own dataset.
   ```json
   {
       "scale": 20.0,
-      "similarity_fct": "cos_sim",
-      "gather_across_devices": false
   }
   ```
 </details>
@@ -568,8 +557,7 @@ You can finetune this model on your own dataset.
   ```json
   {
       "scale": 20.0,
-      "similarity_fct": "cos_sim",
-      "gather_across_devices": false
   }
   ```
 </details>
@@ -595,8 +583,7 @@ You can finetune this model on your own dataset.
   ```json
   {
       "scale": 20.0,
-      "similarity_fct": "cos_sim",
-      "gather_across_devices": false
   }
   ```
 </details>
@@ -622,8 +609,7 @@ You can finetune this model on your own dataset.
   ```json
   {
       "scale": 20.0,
-      "similarity_fct": "cos_sim",
-      "gather_across_devices": false
   }
   ```
 </details>
@@ -649,8 +635,7 @@ You can finetune this model on your own dataset.
   ```json
   {
       "scale": 20.0,
-      "similarity_fct": "cos_sim",
-      "gather_across_devices": false
   }
   ```
 </details>
@@ -676,8 +661,7 @@ You can finetune this model on your own dataset.
   ```json
   {
       "scale": 20.0,
-      "similarity_fct": "cos_sim",
-      "gather_across_devices": false
   }
   ```
 </details>
@@ -703,8 +687,7 @@ You can finetune this model on your own dataset.
   ```json
   {
       "scale": 20.0,
-      "similarity_fct": "cos_sim",
-      "gather_across_devices": false
   }
   ```
 </details>
@@ -730,8 +713,7 @@ You can finetune this model on your own dataset.
   ```json
   {
       "scale": 20.0,
-      "similarity_fct": "cos_sim",
-      "gather_across_devices": false
   }
   ```
 </details>
@@ -757,8 +739,7 @@ You can finetune this model on your own dataset.
   ```json
   {
       "scale": 20.0,
-      "similarity_fct": "cos_sim",
-      "gather_across_devices": false
   }
   ```
 </details>
@@ -784,8 +765,7 @@ You can finetune this model on your own dataset.
   ```json
   {
       "scale": 20.0,
-      "similarity_fct": "cos_sim",
-      "gather_across_devices": false
   }
   ```
 </details>
@@ -811,8 +791,7 @@ You can finetune this model on your own dataset.
   ```json
   {
       "scale": 20.0,
-      "similarity_fct": "cos_sim",
-      "gather_across_devices": false
   }
   ```
 </details>
@@ -838,8 +817,7 @@ You can finetune this model on your own dataset.
   ```json
   {
       "scale": 20.0,
-      "similarity_fct": "cos_sim",
-      "gather_across_devices": false
   }
   ```
 </details>
@@ -865,8 +843,7 @@ You can finetune this model on your own dataset.
   ```json
   {
       "scale": 20.0,
-      "similarity_fct": "cos_sim",
-      "gather_across_devices": false
   }
   ```
 </details>
@@ -892,8 +869,7 @@ You can finetune this model on your own dataset.
   ```json
   {
       "scale": 20.0,
-      "similarity_fct": "cos_sim",
-      "gather_across_devices": false
   }
   ```
 </details>
@@ -919,8 +895,7 @@ You can finetune this model on your own dataset.
   ```json
   {
       "scale": 20.0,
-      "similarity_fct": "cos_sim",
-      "gather_across_devices": false
   }
   ```
 </details>
@@ -946,8 +921,7 @@ You can finetune this model on your own dataset.
   ```json
   {
       "scale": 20.0,
-      "similarity_fct": "cos_sim",
-      "gather_across_devices": false
   }
   ```
 </details>
@@ -973,8 +947,7 @@ You can finetune this model on your own dataset.
   ```json
   {
       "scale": 20.0,
-      "similarity_fct": "cos_sim",
-      "gather_across_devices": false
   }
   ```
 </details>
@@ -1000,8 +973,7 @@ You can finetune this model on your own dataset.
   ```json
   {
       "scale": 20.0,
-      "similarity_fct": "cos_sim",
-      "gather_across_devices": false
   }
   ```
 </details>
@@ -1027,8 +999,7 @@ You can finetune this model on your own dataset.
   ```json
   {
       "scale": 20.0,
-      "similarity_fct": "cos_sim",
-      "gather_across_devices": false
   }
   ```
 </details>
@@ -1054,8 +1025,7 @@ You can finetune this model on your own dataset.
   ```json
   {
       "scale": 20.0,
-      "similarity_fct": "cos_sim",
-      "gather_across_devices": false
   }
   ```
 </details>
@@ -1081,8 +1051,7 @@ You can finetune this model on your own dataset.
   ```json
   {
       "scale": 20.0,
-      "similarity_fct": "cos_sim",
-      "gather_across_devices": false
   }
   ```
 </details>
@@ -1167,7 +1136,7 @@ You can finetune this model on your own dataset.
 - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
 - `deepspeed`: None
 - `label_smoothing_factor`: 0.0
-- `optim`: adamw_torch_fused
 - `optim_args`: None
 - `adafactor`: False
 - `group_by_length`: False
@@ -1185,7 +1154,6 @@ You can finetune this model on your own dataset.
 - `hub_strategy`: checkpoint
 - `hub_private_repo`: None
 - `hub_always_push`: False
-- `hub_revision`: None
 - `gradient_checkpointing`: False
 - `gradient_checkpointing_kwargs`: None
 - `include_inputs_for_metrics`: False
@@ -1210,86 +1178,31 @@ You can finetune this model on your own dataset.
 - `batch_eval_metrics`: False
 - `eval_on_start`: False
 - `use_liger_kernel`: False
-- `liger_kernel_config`: None
 - `eval_use_gather_object`: False
 - `average_tokens_across_devices`: False
 - `prompts`: None
 - `batch_sampler`: batch_sampler
 - `multi_dataset_batch_sampler`: proportional
-- `router_mapping`: {}
-- `learning_rate_mapping`: {}
 </details>
 ### Training Logs
 | Epoch  | Step | Training Loss |
 |:------:|:----:|:-------------:|
-| 0.0963 | 5450 | 0.4723        |
-| 0.0972 | 5500 | 0.5258        |
-| 0.0981 | 5550 | 0.4851        |
-| 0.0990 | 5600 | 0.5311        |
-| 0.0998 | 5650 | 0.411         |
-| 0.1007 | 5700 | 0.4184        |
-| 0.1016 | 5750 | 0.4071        |
-| 0.1025 | 5800 | 0.4712        |
-| 0.1034 | 5850 | 0.4912        |
-| 0.1043 | 5900 | 0.5589        |
-| 0.1051 | 5950 | 0.4507        |
-| 0.1060 | 6000 | 0.5429        |
-| 0.1069 | 6050 | 0.3789        |
-| 0.1078 | 6100 | 0.3949        |
-| 0.1087 | 6150 | 0.4491        |
-| 0.1096 | 6200 | 0.435         |
-| 0.1104 | 6250 | 0.3865        |
-| 0.1113 | 6300 | 0.4175        |
-| 0.1122 | 6350 | 0.4387        |
-| 0.1131 | 6400 | 0.4554        |
-| 0.1140 | 6450 | 0.581         |
-| 0.1149 | 6500 | 0.4746        |
-| 0.1157 | 6550 | 0.4511        |
-| 0.1166 | 6600 | 0.3871        |
-| 0.1175 | 6650 | 0.449         |
-| 0.1184 | 6700 | 0.3458        |
-| 0.1193 | 6750 | 0.4791        |
-| 0.1202 | 6800 | 0.4445        |
-| 0.1210 | 6850 | 0.4907        |
-| 0.1219 | 6900 | 0.5377        |
-| 0.1228 | 6950 | 0.5275        |
-| 0.1237 | 7000 | 0.5489        |
-| 0.1246 | 7050 | 0.3931        |
-| 0.1255 | 7100 | 0.5155        |
-| 0.1263 | 7150 | 0.4282        |
-| 0.1272 | 7200 | 0.4639        |
-| 0.1281 | 7250 | 0.5294        |
-| 0.1290 | 7300 | 0.4121        |
-| 0.1299 | 7350 | 0.4139        |
-| 0.1308 | 7400 | 0.4311        |
-| 0.1316 | 7450 | 0.4025        |
-| 0.1325 | 7500 | 0.4791        |
-| 0.1334 | 7550 | 0.4498        |
-| 0.1343 | 7600 | 0.4538        |
-| 0.1352 | 7650 | 0.4596        |
-| 0.1361 | 7700 | 0.3508        |
-| 0.1369 | 7750 | 0.5145        |
-| 0.1378 | 7800 | 0.3505        |
-| 0.1387 | 7850 | 0.3354        |
-| 0.1396 | 7900 | 0.4474        |
-| 0.1405 | 7950 | 0.3524        |
-| 0.1414 | 8000 | 0.4957        |
-| 0.1422 | 8050 | 0.4461        |
-| 0.1431 | 8100 | 0.4983        |
-| 0.1440 | 8150 | 0.4535        |
-| 0.1449 | 8200 | 0.3668        |
 ### Framework Versions
-- Python: 3.12.11
-- Sentence Transformers: 5.1.0
-- Transformers: 4.55.2
-- PyTorch: 2.8.0+cu126
-- Accelerate: 1.10.0
-- Datasets: 4.0.0
-- Tokenizers: 0.21.4
 ## Citation

 - sentence-transformers
 - sentence-similarity
 - feature-extraction
 - generated_from_trainer
 - dataset_size:14483470
 - loss:MultipleNegativesRankingLoss
 ```
 SentenceTransformer(
+  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel
   (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
   (2): Normalize()
 )
 # Get the similarity scores for the embeddings
 similarities = model.similarity(embeddings, embeddings)
+print(similarities.shape)
+# [3, 3]
 ```
 <!--
   ```json
   {
       "scale": 20.0,
+      "similarity_fct": "cos_sim"
   }
   ```
 </details>
   ```json
   {
       "scale": 20.0,
+      "similarity_fct": "cos_sim"
   }
   ```
 </details>
   ```json
   {
       "scale": 20.0,
+      "similarity_fct": "cos_sim"
   }
   ```
 </details>
   ```json
   {
       "scale": 20.0,
+      "similarity_fct": "cos_sim"
   }
   ```
 </details>
   ```json
   {
       "scale": 20.0,
+      "similarity_fct": "cos_sim"
   }
   ```
 </details>
   ```json
   {
       "scale": 20.0,
+      "similarity_fct": "cos_sim"
   }
   ```
 </details>
   ```json
   {
       "scale": 20.0,
+      "similarity_fct": "cos_sim"
   }
   ```
 </details>
   ```json
   {
       "scale": 20.0,
+      "similarity_fct": "cos_sim"
   }
   ```
 </details>
   ```json
   {
       "scale": 20.0,
+      "similarity_fct": "cos_sim"
   }
   ```
 </details>
   ```json
   {
       "scale": 20.0,
+      "similarity_fct": "cos_sim"
   }
   ```
 </details>
   ```json
   {
       "scale": 20.0,
+      "similarity_fct": "cos_sim"
   }
   ```
 </details>
   ```json
   {
       "scale": 20.0,
+      "similarity_fct": "cos_sim"
   }
   ```
 </details>
   ```json
   {
       "scale": 20.0,
+      "similarity_fct": "cos_sim"
   }
   ```
 </details>
   ```json
   {
       "scale": 20.0,
+      "similarity_fct": "cos_sim"
   }
   ```
 </details>
   ```json
   {
       "scale": 20.0,
+      "similarity_fct": "cos_sim"
   }
   ```
 </details>
   ```json
   {
       "scale": 20.0,
+      "similarity_fct": "cos_sim"
   }
   ```
 </details>
   ```json
   {
       "scale": 20.0,
+      "similarity_fct": "cos_sim"
   }
   ```
 </details>
   ```json
   {
       "scale": 20.0,
+      "similarity_fct": "cos_sim"
   }
   ```
 </details>
   ```json
   {
       "scale": 20.0,
+      "similarity_fct": "cos_sim"
   }
   ```
 </details>
   ```json
   {
       "scale": 20.0,
+      "similarity_fct": "cos_sim"
   }
   ```
 </details>
   ```json
   {
       "scale": 20.0,
+      "similarity_fct": "cos_sim"
   }
   ```
 </details>
   ```json
   {
       "scale": 20.0,
+      "similarity_fct": "cos_sim"
   }
   ```
 </details>
   ```json
   {
       "scale": 20.0,
+      "similarity_fct": "cos_sim"
   }
   ```
 </details>
   ```json
   {
       "scale": 20.0,
+      "similarity_fct": "cos_sim"
   }
   ```
 </details>
   ```json
   {
       "scale": 20.0,
+      "similarity_fct": "cos_sim"
   }
   ```
 </details>
   ```json
   {
       "scale": 20.0,
+      "similarity_fct": "cos_sim"
   }
   ```
 </details>
   ```json
   {
       "scale": 20.0,
+      "similarity_fct": "cos_sim"
   }
   ```
 </details>
   ```json
   {
       "scale": 20.0,
+      "similarity_fct": "cos_sim"
   }
   ```
 </details>
 - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
 - `deepspeed`: None
 - `label_smoothing_factor`: 0.0
+- `optim`: adamw_torch
 - `optim_args`: None
 - `adafactor`: False
 - `group_by_length`: False
 - `hub_strategy`: checkpoint
 - `hub_private_repo`: None
 - `hub_always_push`: False
 - `gradient_checkpointing`: False
 - `gradient_checkpointing_kwargs`: None
 - `include_inputs_for_metrics`: False
 - `batch_eval_metrics`: False
 - `eval_on_start`: False
 - `use_liger_kernel`: False
 - `eval_use_gather_object`: False
 - `average_tokens_across_devices`: False
 - `prompts`: None
 - `batch_sampler`: batch_sampler
 - `multi_dataset_batch_sampler`: proportional
 </details>
 ### Training Logs
 | Epoch  | Step | Training Loss |
 |:------:|:----:|:-------------:|
+| 0.1458 | 8250 | 0.4688        |
+| 0.1467 | 8300 | 0.3967        |
+| 0.1475 | 8350 | 0.4911        |
+| 0.1484 | 8400 | 0.4076        |
 ### Framework Versions
+- Python: 3.11.13
+- Sentence Transformers: 4.1.0
+- Transformers: 4.52.4
+- PyTorch: 2.6.0+cu124
+- Accelerate: 1.8.1
+- Datasets: 3.6.0
+- Tokenizers: 0.21.2
 ## Citation

last-checkpoint/config.json CHANGED Viewed

@@ -18,7 +18,7 @@
   "pad_token_id": 0,
   "position_embedding_type": "absolute",
   "torch_dtype": "float32",
-  "transformers_version": "4.55.2",
   "type_vocab_size": 2,
   "use_cache": true,
   "vocab_size": 30522

   "pad_token_id": 0,
   "position_embedding_type": "absolute",
   "torch_dtype": "float32",
+  "transformers_version": "4.52.4",
   "type_vocab_size": 2,
   "use_cache": true,
   "vocab_size": 30522

last-checkpoint/config_sentence_transformers.json CHANGED Viewed

@@ -1,14 +1,10 @@
 {
   "__version__": {
-    "sentence_transformers": "5.1.0",
-    "transformers": "4.55.2",
-    "pytorch": "2.8.0+cu126"
-  },
-  "model_type": "SentenceTransformer",
-  "prompts": {
-    "query": "",
-    "document": ""
   },
   "default_prompt_name": null,
   "similarity_fn_name": "cosine"
 }

 {
   "__version__": {
+    "sentence_transformers": "4.1.0",
+    "transformers": "4.52.4",
+    "pytorch": "2.6.0+cu124"
   },
+  "prompts": {},
   "default_prompt_name": null,
   "similarity_fn_name": "cosine"
 }

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4640bbfebbb11ecea082ff1af6d855d052c0c8e1bdf9168d4f7359599f62aabd
 size 90864192

 version https://git-lfs.github.com/spec/v1
+oid sha256:d4889319c3ce656b4822f1a50aac0284145a17b11d2f3ae21fabab4cf0583b0e
 size 90864192

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ccd29683807f5e0218e2f5d58ca633b3fdf6424d5b06a7fd7dd1632307029b7e
-size 180609611

 version https://git-lfs.github.com/spec/v1
+oid sha256:ef06c105348a65b7eed64942f5dea6f43606f7f0ef5b2b0bde7930b99c26cac5
+size 180609210

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eae3e1579fe71c3a791c337c98621066f7781ea53baa1bbd326d61e1273a7011
-size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:b9d487123f633c1d3c5f62a2fbd2d4a814f21aa207d5180464abde74c25ff991
+size 14244

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e1b7589e0e51e5ea2c39973809a8f993519f86ffc0bc8e5dc5baeb72e06aa3bb
-size 1383

 version https://git-lfs.github.com/spec/v1
+oid sha256:899a642947e70d7e60cf2eeee96b1e912dfb435fbf6e019478dc646f870906c8
+size 988

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:71575ac6af3608ee2ab21516b68821ed7b385bc77900a19bfec38f30c0387852
-size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:9d81eef3cd79f288260f908a6802f0ede97e49007282e07a31f47a48189a183b
+size 1064

last-checkpoint/sentence_bert_config.json CHANGED Viewed

@@ -1,4 +1,4 @@
 {
-    "max_seq_length": 256,
-    "do_lower_case": false
 }

 {
+  "max_seq_length": 256,
+  "do_lower_case": false
 }

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.14489936562350905,
   "eval_steps": 500,
-  "global_step": 8200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1156,6 +1156,34 @@
       "learning_rate": 4.750741198876913e-05,
       "loss": 0.3668,
       "step": 8200
     }
   ],
   "logging_steps": 50,

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.1484334964923751,
   "eval_steps": 500,
+  "global_step": 8400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 4.750741198876913e-05,
       "loss": 0.3668,
       "step": 8200
+    },
+    {
+      "epoch": 0.14578289834072555,
+      "grad_norm": 1.9963476657867432,
+      "learning_rate": 4.7458325970430585e-05,
+      "loss": 0.4688,
+      "step": 8250
+    },
+    {
+      "epoch": 0.14666643105794208,
+      "grad_norm": 1.7402074337005615,
+      "learning_rate": 4.740923995209205e-05,
+      "loss": 0.3967,
+      "step": 8300
+    },
+    {
+      "epoch": 0.14754996377515858,
+      "grad_norm": 2.0074145793914795,
+      "learning_rate": 4.736015393375351e-05,
+      "loss": 0.4911,
+      "step": 8350
+    },
+    {
+      "epoch": 0.1484334964923751,
+      "grad_norm": 1.7804876565933228,
+      "learning_rate": 4.731106791541497e-05,
+      "loss": 0.4076,
+      "step": 8400
     }
   ],
   "logging_steps": 50,

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:86bf969bd16bf2be7e7bcd2ab22f5624b04e88153ec953d1f322a51c17cc5ce6
-size 6097

 version https://git-lfs.github.com/spec/v1
+oid sha256:764686e45491e3136468635b2d7d30b214689849603924e41f88b81ff0c3a1d0
+size 5560