Training in progress, step 26400, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
|
@@ -1551,6 +1551,10 @@ You can finetune this model on your own dataset.
|
|
| 1551 |
| 0.4612 | 26100 | 0.2856 |
|
| 1552 |
| 0.4621 | 26150 | 0.3668 |
|
| 1553 |
| 0.4630 | 26200 | 0.4672 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1554 |
|
| 1555 |
</details>
|
| 1556 |
|
|
|
|
| 1551 |
| 0.4612 | 26100 | 0.2856 |
|
| 1552 |
| 0.4621 | 26150 | 0.3668 |
|
| 1553 |
| 0.4630 | 26200 | 0.4672 |
|
| 1554 |
+
| 0.4639 | 26250 | 0.373 |
|
| 1555 |
+
| 0.4647 | 26300 | 0.3232 |
|
| 1556 |
+
| 0.4656 | 26350 | 0.3461 |
|
| 1557 |
+
| 0.4665 | 26400 | 0.2937 |
|
| 1558 |
|
| 1559 |
</details>
|
| 1560 |
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 90864192
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:95db985db084a74db34108a18955cb2f94475e94d71ba1394dc6711c0773d4b7
|
| 3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 180609210
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:99edc411ece53670a76daae12b850cb27d05b6b28f0e1abb1039884ae4ec4d91
|
| 3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c3c74149b9485dd85a90bddde2fb89338979661fdbddb3f61abad71930d1b347
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5aa35335d6835971914c8cd5a825891507e75318afb8a7d46177ecba5e7044a7
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d8741bd796701c3df080c6afd4a635df632277d4edf7423e34ebd4e6d1a562aa
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -3676,6 +3676,34 @@
|
|
| 3676 |
"learning_rate": 2.9844299149830163e-05,
|
| 3677 |
"loss": 0.4672,
|
| 3678 |
"step": 26200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3679 |
}
|
| 3680 |
],
|
| 3681 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.46650527469032177,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 26400,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 3676 |
"learning_rate": 2.9844299149830163e-05,
|
| 3677 |
"loss": 0.4672,
|
| 3678 |
"step": 26200
|
| 3679 |
+
},
|
| 3680 |
+
{
|
| 3681 |
+
"epoch": 0.46385467653867224,
|
| 3682 |
+
"grad_norm": 3.4169373512268066,
|
| 3683 |
+
"learning_rate": 2.9795213131491627e-05,
|
| 3684 |
+
"loss": 0.373,
|
| 3685 |
+
"step": 26250
|
| 3686 |
+
},
|
| 3687 |
+
{
|
| 3688 |
+
"epoch": 0.46473820925588877,
|
| 3689 |
+
"grad_norm": 1.5430257320404053,
|
| 3690 |
+
"learning_rate": 2.974612711315309e-05,
|
| 3691 |
+
"loss": 0.3232,
|
| 3692 |
+
"step": 26300
|
| 3693 |
+
},
|
| 3694 |
+
{
|
| 3695 |
+
"epoch": 0.46562174197310524,
|
| 3696 |
+
"grad_norm": 1.674177646636963,
|
| 3697 |
+
"learning_rate": 2.9697041094814553e-05,
|
| 3698 |
+
"loss": 0.3461,
|
| 3699 |
+
"step": 26350
|
| 3700 |
+
},
|
| 3701 |
+
{
|
| 3702 |
+
"epoch": 0.46650527469032177,
|
| 3703 |
+
"grad_norm": 1.7116457223892212,
|
| 3704 |
+
"learning_rate": 2.9647955076476018e-05,
|
| 3705 |
+
"loss": 0.2937,
|
| 3706 |
+
"step": 26400
|
| 3707 |
}
|
| 3708 |
],
|
| 3709 |
"logging_steps": 50,
|