Training in progress, step 50400, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
@@ -1299,6 +1299,10 @@ You can finetune this model on your own dataset.
|
|
1299 |
| 0.8853 | 50100 | 0.3104 |
|
1300 |
| 0.8862 | 50150 | 0.368 |
|
1301 |
| 0.8871 | 50200 | 0.2848 |
|
|
|
|
|
|
|
|
|
1302 |
|
1303 |
</details>
|
1304 |
|
|
|
1299 |
| 0.8853 | 50100 | 0.3104 |
|
1300 |
| 0.8862 | 50150 | 0.368 |
|
1301 |
| 0.8871 | 50200 | 0.2848 |
|
1302 |
+
| 0.8880 | 50250 | 0.4137 |
|
1303 |
+
| 0.8888 | 50300 | 0.3597 |
|
1304 |
+
| 0.8897 | 50350 | 0.3246 |
|
1305 |
+
| 0.8906 | 50400 | 0.2658 |
|
1306 |
|
1307 |
</details>
|
1308 |
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 90864192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4402d5595619f726b76f6c11fbb431abe89c0fdd64c6e52d57bdcebda5c57397
|
3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 180609210
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:68b89f5d87aba4126dddf46cba6f5468fca0adb9ffb6a7c457ba09e31e0cf598
|
3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9f76286cc6f7170c117d180053396109205046e24f113f39ebb7eba52374e471
|
3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 988
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7f630d12e96ea581a03c83786f1675e35621e66e28e380551b7eca37b86e7c85
|
3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:56730b1eb2f09afcd359899fa053433fb9b0469fdf6908ef9d55d03c5ac4ee2e
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch": 0.
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -7036,6 +7036,34 @@
|
|
7036 |
"learning_rate": 6.293809271367145e-06,
|
7037 |
"loss": 0.2848,
|
7038 |
"step": 50200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7039 |
}
|
7040 |
],
|
7041 |
"logging_steps": 50,
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 0.8906009789542507,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 50400,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
7036 |
"learning_rate": 6.293809271367145e-06,
|
7037 |
"loss": 0.2848,
|
7038 |
"step": 50200
|
7039 |
+
},
|
7040 |
+
{
|
7041 |
+
"epoch": 0.8879503808026011,
|
7042 |
+
"grad_norm": 1.1558799743652344,
|
7043 |
+
"learning_rate": 6.244723253028607e-06,
|
7044 |
+
"loss": 0.4137,
|
7045 |
+
"step": 50250
|
7046 |
+
},
|
7047 |
+
{
|
7048 |
+
"epoch": 0.8888339135198177,
|
7049 |
+
"grad_norm": 1.2852174043655396,
|
7050 |
+
"learning_rate": 6.195637234690072e-06,
|
7051 |
+
"loss": 0.3597,
|
7052 |
+
"step": 50300
|
7053 |
+
},
|
7054 |
+
{
|
7055 |
+
"epoch": 0.8897174462370342,
|
7056 |
+
"grad_norm": 2.747140407562256,
|
7057 |
+
"learning_rate": 6.146551216351535e-06,
|
7058 |
+
"loss": 0.3246,
|
7059 |
+
"step": 50350
|
7060 |
+
},
|
7061 |
+
{
|
7062 |
+
"epoch": 0.8906009789542507,
|
7063 |
+
"grad_norm": 1.5731008052825928,
|
7064 |
+
"learning_rate": 6.097465198012998e-06,
|
7065 |
+
"loss": 0.2658,
|
7066 |
+
"step": 50400
|
7067 |
}
|
7068 |
],
|
7069 |
"logging_steps": 50,
|