Training in progress, step 44000, checkpoint

Files changed (7) hide show

last-checkpoint/README.md CHANGED Viewed

@@ -1395,6 +1395,10 @@ You can finetune this model on your own dataset.
 | 0.7722 | 43700 | 0.251         |
 | 0.7731 | 43750 | 0.3154        |
 | 0.7740 | 43800 | 0.3309        |
 </details>

 | 0.7722 | 43700 | 0.251         |
 | 0.7731 | 43750 | 0.3154        |
 | 0.7740 | 43800 | 0.3309        |
+| 0.7749 | 43850 | 0.2768        |
+| 0.7757 | 43900 | 0.3049        |
+| 0.7766 | 43950 | 0.2939        |
+| 0.7775 | 44000 | 0.2909        |
 </details>

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:87245a00c511204e0c66583191ab9429ad97c78538541227c616f90b8381119e
 size 90864192

 version https://git-lfs.github.com/spec/v1
+oid sha256:12bbe29ea94a2a3e248956e4ad4f48429053ec1581ede3091505ee45e42c209e
 size 90864192

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8b6c1541af87dcd1797f9d736a3cac898e50e3ecafd501e98798ccab543ede07
 size 180609210

 version https://git-lfs.github.com/spec/v1
+oid sha256:fc95281118678bfa685a8fa9d0f9c2b4de3f7f9c764726219c4efe80cd17496a
 size 180609210

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:69c36e80b730b2d3f19367fe96dc275025a093a975c30683cdcf06771c2e520f
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:2248c4c2662777880f6435205956f86ee9cef13d01a64d9b6e6e97f9ff29c8a6
 size 14244

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0c2a170b686e6b3841063ec2a8f0cf18b4985f4986723acd35709abf15d5c19e
 size 988

 version https://git-lfs.github.com/spec/v1
+oid sha256:41c83ba4cca9cc14a64aa6c3e0597b81f5560663b8d07b1843663e365af550c1
 size 988

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a1960f4f5d6f42011bfc954842f6c57ccfbbc8ac7380b9fbe5cdcbb8bd1b0029
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:947d6bd91fa66a1fa64426daf31057323ec244470e03f87d8bca1497c127e637
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.7739746602816703,
   "eval_steps": 500,
-  "global_step": 43800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -6140,6 +6140,34 @@
       "learning_rate": 1.25738744575995e-05,
       "loss": 0.3309,
       "step": 43800
     }
   ],
   "logging_steps": 50,

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.7775087911505363,
   "eval_steps": 500,
+  "global_step": 44000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.25738744575995e-05,
       "loss": 0.3309,
       "step": 43800
+    },
+    {
+      "epoch": 0.7748581929988867,
+      "grad_norm": 1.3661130666732788,
+      "learning_rate": 1.2524788439260962e-05,
+      "loss": 0.2768,
+      "step": 43850
+    },
+    {
+      "epoch": 0.7757417257161032,
+      "grad_norm": 1.3001888990402222,
+      "learning_rate": 1.2475702420922425e-05,
+      "loss": 0.3049,
+      "step": 43900
+    },
+    {
+      "epoch": 0.7766252584333198,
+      "grad_norm": 1.4377065896987915,
+      "learning_rate": 1.2426616402583888e-05,
+      "loss": 0.2939,
+      "step": 43950
+    },
+    {
+      "epoch": 0.7775087911505363,
+      "grad_norm": 2.084547519683838,
+      "learning_rate": 1.2377530384245351e-05,
+      "loss": 0.2909,
+      "step": 44000
     }
   ],
   "logging_steps": 50,