Training in progress, step 51000, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
@@ -1311,6 +1311,10 @@ You can finetune this model on your own dataset.
|
|
1311 |
| 0.8959 | 50700 | 0.2947 |
|
1312 |
| 0.8968 | 50750 | 0.3013 |
|
1313 |
| 0.8977 | 50800 | 0.3845 |
|
|
|
|
|
|
|
|
|
1314 |
|
1315 |
</details>
|
1316 |
|
|
|
1311 |
| 0.8959 | 50700 | 0.2947 |
|
1312 |
| 0.8968 | 50750 | 0.3013 |
|
1313 |
| 0.8977 | 50800 | 0.3845 |
|
1314 |
+
| 0.8986 | 50850 | 0.2882 |
|
1315 |
+
| 0.8994 | 50900 | 0.3639 |
|
1316 |
+
| 0.9003 | 50950 | 0.2332 |
|
1317 |
+
| 0.9012 | 51000 | 0.3363 |
|
1318 |
|
1319 |
</details>
|
1320 |
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 90864192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8e5b296e1f58fde06ca25a671f7290066af40d31d74f593dd71beb56e81f618e
|
3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 180609210
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64cec233f06e1ea5350dc36447e7fd4afa810289f7a83dc18968cd19ae207a12
|
3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c2b900393780cdd3cbb99d501f19a790b00c5891d77d31ffa5ebe767f48b02bb
|
3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 988
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:30c99faa590c236be1e60450ac31ef6bbc0d8a5408817f47526e33c981695098
|
3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2640c4dcf9a6fa010a98a2ceb775abaf0e70fd53baf808dd65285ab30eadea3f
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch": 0.
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -7120,6 +7120,34 @@
|
|
7120 |
"learning_rate": 5.704777051304706e-06,
|
7121 |
"loss": 0.3845,
|
7122 |
"step": 50800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7123 |
}
|
7124 |
],
|
7125 |
"logging_steps": 50,
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 0.901203371560849,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 51000,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
7120 |
"learning_rate": 5.704777051304706e-06,
|
7121 |
"loss": 0.3845,
|
7122 |
"step": 50800
|
7123 |
+
},
|
7124 |
+
{
|
7125 |
+
"epoch": 0.8985527734091994,
|
7126 |
+
"grad_norm": 1.9302955865859985,
|
7127 |
+
"learning_rate": 5.65569103296617e-06,
|
7128 |
+
"loss": 0.2882,
|
7129 |
+
"step": 50850
|
7130 |
+
},
|
7131 |
+
{
|
7132 |
+
"epoch": 0.8994363061264159,
|
7133 |
+
"grad_norm": 2.9837305545806885,
|
7134 |
+
"learning_rate": 5.606605014627634e-06,
|
7135 |
+
"loss": 0.3639,
|
7136 |
+
"step": 50900
|
7137 |
+
},
|
7138 |
+
{
|
7139 |
+
"epoch": 0.9003198388436324,
|
7140 |
+
"grad_norm": 1.3305821418762207,
|
7141 |
+
"learning_rate": 5.557518996289098e-06,
|
7142 |
+
"loss": 0.2332,
|
7143 |
+
"step": 50950
|
7144 |
+
},
|
7145 |
+
{
|
7146 |
+
"epoch": 0.901203371560849,
|
7147 |
+
"grad_norm": 1.2136187553405762,
|
7148 |
+
"learning_rate": 5.508432977950561e-06,
|
7149 |
+
"loss": 0.3363,
|
7150 |
+
"step": 51000
|
7151 |
}
|
7152 |
],
|
7153 |
"logging_steps": 50,
|