Training in progress, step 50000, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
@@ -1187,6 +1187,8 @@ You can finetune this model on your own dataset.
|
|
1187 |
</details>
|
1188 |
|
1189 |
### Training Logs
|
|
|
|
|
1190 |
| Epoch | Step | Training Loss |
|
1191 |
|:------:|:-----:|:-------------:|
|
1192 |
| 0.7925 | 44850 | 0.25 |
|
@@ -1289,7 +1291,12 @@ You can finetune this model on your own dataset.
|
|
1289 |
| 0.8782 | 49700 | 0.267 |
|
1290 |
| 0.8791 | 49750 | 0.2599 |
|
1291 |
| 0.8800 | 49800 | 0.3226 |
|
|
|
|
|
|
|
|
|
1292 |
|
|
|
1293 |
|
1294 |
### Framework Versions
|
1295 |
- Python: 3.11.13
|
|
|
1187 |
</details>
|
1188 |
|
1189 |
### Training Logs
|
1190 |
+
<details><summary>Click to expand</summary>
|
1191 |
+
|
1192 |
| Epoch | Step | Training Loss |
|
1193 |
|:------:|:-----:|:-------------:|
|
1194 |
| 0.7925 | 44850 | 0.25 |
|
|
|
1291 |
| 0.8782 | 49700 | 0.267 |
|
1292 |
| 0.8791 | 49750 | 0.2599 |
|
1293 |
| 0.8800 | 49800 | 0.3226 |
|
1294 |
+
| 0.8809 | 49850 | 0.3516 |
|
1295 |
+
| 0.8818 | 49900 | 0.2912 |
|
1296 |
+
| 0.8826 | 49950 | 0.2853 |
|
1297 |
+
| 0.8835 | 50000 | 0.3212 |
|
1298 |
|
1299 |
+
</details>
|
1300 |
|
1301 |
### Framework Versions
|
1302 |
- Python: 3.11.13
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 90864192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f629f9c4d9b3dfe15298ca42cf535d9537f9d84496ead633516c3f55df659809
|
3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 180609210
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:82a5e0de80f3319e9bcb41ff6fcaf27ef7f48f3c5947829a81867a4d0d22eced
|
3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:39be9337bc1edfc554e8e83920537fc1f1ac1fb64502256befd2c768cb7272f2
|
3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 988
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:05900a01ed21b7d3fe65d7b48e29dc049394fbc9a57676739fac72875cfe1dad
|
3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f94f189d019244fe1bc7202740eb1c5db8cec176873eb287a31349f77949d54f
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch": 0.
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -6980,6 +6980,34 @@
|
|
6980 |
"learning_rate": 6.686497418075435e-06,
|
6981 |
"loss": 0.3226,
|
6982 |
"step": 49800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6983 |
}
|
6984 |
],
|
6985 |
"logging_steps": 50,
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 0.8835327172165185,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 50000,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
6980 |
"learning_rate": 6.686497418075435e-06,
|
6981 |
"loss": 0.3226,
|
6982 |
"step": 49800
|
6983 |
+
},
|
6984 |
+
{
|
6985 |
+
"epoch": 0.880882119064869,
|
6986 |
+
"grad_norm": 1.385986089706421,
|
6987 |
+
"learning_rate": 6.6374113997369e-06,
|
6988 |
+
"loss": 0.3516,
|
6989 |
+
"step": 49850
|
6990 |
+
},
|
6991 |
+
{
|
6992 |
+
"epoch": 0.8817656517820854,
|
6993 |
+
"grad_norm": 1.4890649318695068,
|
6994 |
+
"learning_rate": 6.588325381398362e-06,
|
6995 |
+
"loss": 0.2912,
|
6996 |
+
"step": 49900
|
6997 |
+
},
|
6998 |
+
{
|
6999 |
+
"epoch": 0.882649184499302,
|
7000 |
+
"grad_norm": 2.459829807281494,
|
7001 |
+
"learning_rate": 6.5392393630598265e-06,
|
7002 |
+
"loss": 0.2853,
|
7003 |
+
"step": 49950
|
7004 |
+
},
|
7005 |
+
{
|
7006 |
+
"epoch": 0.8835327172165185,
|
7007 |
+
"grad_norm": 1.6274219751358032,
|
7008 |
+
"learning_rate": 6.4901533447212895e-06,
|
7009 |
+
"loss": 0.3212,
|
7010 |
+
"step": 50000
|
7011 |
}
|
7012 |
],
|
7013 |
"logging_steps": 50,
|