Training in progress, step 2400, checkpoint

Files changed (7) hide show

last-checkpoint/README.md CHANGED Viewed

@@ -286,9 +286,9 @@ print(embeddings.shape)
 # Get the similarity scores for the embeddings
 similarities = model.similarity(embeddings, embeddings)
 print(similarities)
-# tensor([[1.0000, 0.6084, 0.1042],
-#         [0.6084, 1.0000, 0.1457],
-#         [0.1042, 0.1457, 1.0000]])
 ```
 <!--
@@ -1268,6 +1268,10 @@ You can finetune this model on your own dataset.
 | 0.0371 | 2100 | 0.5966        |
 | 0.0380 | 2150 | 0.5989        |
 | 0.0389 | 2200 | 0.5097        |
 ### Framework Versions

 # Get the similarity scores for the embeddings
 similarities = model.similarity(embeddings, embeddings)
 print(similarities)
+# tensor([[1.0000, 0.5939, 0.0989],
+#         [0.5939, 1.0000, 0.1429],
+#         [0.0989, 0.1429, 1.0000]])
 ```
 <!--
 | 0.0371 | 2100 | 0.5966        |
 | 0.0380 | 2150 | 0.5989        |
 | 0.0389 | 2200 | 0.5097        |
+| 0.0398 | 2250 | 0.4934        |
+| 0.0406 | 2300 | 0.5354        |
+| 0.0415 | 2350 | 0.5236        |
+| 0.0424 | 2400 | 0.5057        |
 ### Framework Versions

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:43e276da028dbf8b1ad5144673c6ce3925a2b3aabd87a44547c55a6459e20d87
 size 90864192

 version https://git-lfs.github.com/spec/v1
+oid sha256:0c241b1c1e2bc79f90ea3e691425a4d078b73a1355962e0a546f6d5b25945da0
 size 90864192

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:93cc133162fde43973a8ccb2ec2dfc87437cbd187213b067665594adaa028144
 size 180609611

 version https://git-lfs.github.com/spec/v1
+oid sha256:64ba4f5c69a3d02a5a6823d34b8da6ff44cacb16550bdd25ab1d7402501d6357
 size 180609611

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e3c956e4b6b1160a43d807052e2f2f064099f54437c58331d632ad556682176f
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:1d318191f63943626d0af0c2a2dffd47f5f97e52b9ffb5b4186681760d24fbc1
 size 14645

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1f6829e47549862e022b4eedf087cf81394f448150e958ad6663988076bf8691
 size 1383

 version https://git-lfs.github.com/spec/v1
+oid sha256:7da7c5085795b13d2bf0030671cbddb9f62ae43221bf1424a3830d4cf8c19012
 size 1383

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fdfc272a547d5b2f74b766d251fe9186a84058fe1c0528c0560d11ed37d05aa5
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:f85f103e0fd23f9c136b70ea64be4bf637404426959df56180c10126efe39d0a
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.038875439557526814,
   "eval_steps": 500,
-  "global_step": 2200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -316,6 +316,34 @@
       "learning_rate": 1.9425795053003533e-05,
       "loss": 0.5097,
       "step": 2200
     }
   ],
   "logging_steps": 50,

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.04240957042639289,
   "eval_steps": 500,
+  "global_step": 2400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.9425795053003533e-05,
       "loss": 0.5097,
       "step": 2200
+    },
+    {
+      "epoch": 0.039758972274743336,
+      "grad_norm": 2.274600028991699,
+      "learning_rate": 1.986749116607774e-05,
+      "loss": 0.4934,
+      "step": 2250
+    },
+    {
+      "epoch": 0.04064250499195985,
+      "grad_norm": 2.393251895904541,
+      "learning_rate": 2.0309187279151945e-05,
+      "loss": 0.5354,
+      "step": 2300
+    },
+    {
+      "epoch": 0.04152603770917637,
+      "grad_norm": 2.613900899887085,
+      "learning_rate": 2.075088339222615e-05,
+      "loss": 0.5236,
+      "step": 2350
+    },
+    {
+      "epoch": 0.04240957042639289,
+      "grad_norm": 2.233302116394043,
+      "learning_rate": 2.1192579505300354e-05,
+      "loss": 0.5057,
+      "step": 2400
     }
   ],
   "logging_steps": 50,