Training in progress, step 9600, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
@@ -1213,6 +1213,10 @@ You can finetune this model on your own dataset.
|
|
1213 |
| 0.1643 | 9300 | 0.4204 |
|
1214 |
| 0.1652 | 9350 | 0.5576 |
|
1215 |
| 0.1661 | 9400 | 0.4712 |
|
|
|
|
|
|
|
|
|
1216 |
|
1217 |
|
1218 |
### Framework Versions
|
|
|
1213 |
| 0.1643 | 9300 | 0.4204 |
|
1214 |
| 0.1652 | 9350 | 0.5576 |
|
1215 |
| 0.1661 | 9400 | 0.4712 |
|
1216 |
+
| 0.1670 | 9450 | 0.366 |
|
1217 |
+
| 0.1679 | 9500 | 0.3932 |
|
1218 |
+
| 0.1688 | 9550 | 0.4836 |
|
1219 |
+
| 0.1696 | 9600 | 0.3989 |
|
1220 |
|
1221 |
|
1222 |
### Framework Versions
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 90864192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5d75f2ae04decbc38e9837063052796e9f2bb5b93ba4b3967ba78efa578f524f
|
3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 180609210
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d7bd92a04e9045a8d7bd34505c04408ff4f945e28a7e521fda8627bb4a5ffb8
|
3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4991e7b45094aac9ff0a60591e26561675ae2318a690a5d07c69cdb47d8654a9
|
3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 988
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:498b0c6fbe9530a64ad6460f8bb9ba5c88d4dacacfc84f82ced2672249165959
|
3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b09b7aeb1f1c748a56856918139834bbfcfbfa3a7f47fdb7435e88f5b14f5a1d
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch": 0.
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -1324,6 +1324,34 @@
|
|
1324 |
"learning_rate": 4.6329347548644245e-05,
|
1325 |
"loss": 0.4712,
|
1326 |
"step": 9400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1327 |
}
|
1328 |
],
|
1329 |
"logging_steps": 50,
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 0.16963828170557155,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 9600,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
1324 |
"learning_rate": 4.6329347548644245e-05,
|
1325 |
"loss": 0.4712,
|
1326 |
"step": 9400
|
1327 |
+
},
|
1328 |
+
{
|
1329 |
+
"epoch": 0.166987683553922,
|
1330 |
+
"grad_norm": 1.8109374046325684,
|
1331 |
+
"learning_rate": 4.628026153030571e-05,
|
1332 |
+
"loss": 0.366,
|
1333 |
+
"step": 9450
|
1334 |
+
},
|
1335 |
+
{
|
1336 |
+
"epoch": 0.16787121627113852,
|
1337 |
+
"grad_norm": 1.9352269172668457,
|
1338 |
+
"learning_rate": 4.6231175511967175e-05,
|
1339 |
+
"loss": 0.3932,
|
1340 |
+
"step": 9500
|
1341 |
+
},
|
1342 |
+
{
|
1343 |
+
"epoch": 0.16875474898835505,
|
1344 |
+
"grad_norm": 1.7740451097488403,
|
1345 |
+
"learning_rate": 4.618208949362864e-05,
|
1346 |
+
"loss": 0.4836,
|
1347 |
+
"step": 9550
|
1348 |
+
},
|
1349 |
+
{
|
1350 |
+
"epoch": 0.16963828170557155,
|
1351 |
+
"grad_norm": 2.0106916427612305,
|
1352 |
+
"learning_rate": 4.61330034752901e-05,
|
1353 |
+
"loss": 0.3989,
|
1354 |
+
"step": 9600
|
1355 |
}
|
1356 |
],
|
1357 |
"logging_steps": 50,
|