Training in progress, step 33800, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
@@ -1187,156 +1187,13 @@ You can finetune this model on your own dataset.
|
|
1187 |
</details>
|
1188 |
|
1189 |
### Training Logs
|
1190 |
-
<details><summary>Click to expand</summary>
|
1191 |
-
|
1192 |
| Epoch | Step | Training Loss |
|
1193 |
|:------:|:-----:|:-------------:|
|
1194 |
-
| 0.
|
1195 |
-
| 0.
|
1196 |
-
| 0.
|
1197 |
-
| 0.
|
1198 |
-
| 0.4709 | 26650 | 0.276 |
|
1199 |
-
| 0.4718 | 26700 | 0.2572 |
|
1200 |
-
| 0.4727 | 26750 | 0.2821 |
|
1201 |
-
| 0.4736 | 26800 | 0.2753 |
|
1202 |
-
| 0.4745 | 26850 | 0.2789 |
|
1203 |
-
| 0.4753 | 26900 | 0.5175 |
|
1204 |
-
| 0.4762 | 26950 | 0.3398 |
|
1205 |
-
| 0.4771 | 27000 | 0.3683 |
|
1206 |
-
| 0.4780 | 27050 | 0.341 |
|
1207 |
-
| 0.4789 | 27100 | 0.2753 |
|
1208 |
-
| 0.4798 | 27150 | 0.35 |
|
1209 |
-
| 0.4806 | 27200 | 0.3143 |
|
1210 |
-
| 0.4815 | 27250 | 0.3968 |
|
1211 |
-
| 0.4824 | 27300 | 0.3246 |
|
1212 |
-
| 0.4833 | 27350 | 0.3131 |
|
1213 |
-
| 0.4842 | 27400 | 0.3078 |
|
1214 |
-
| 0.4851 | 27450 | 0.419 |
|
1215 |
-
| 0.4859 | 27500 | 0.3959 |
|
1216 |
-
| 0.4868 | 27550 | 0.3754 |
|
1217 |
-
| 0.4877 | 27600 | 0.3163 |
|
1218 |
-
| 0.4886 | 27650 | 0.35 |
|
1219 |
-
| 0.4895 | 27700 | 0.3397 |
|
1220 |
-
| 0.4904 | 27750 | 0.3853 |
|
1221 |
-
| 0.4912 | 27800 | 0.2568 |
|
1222 |
-
| 0.4921 | 27850 | 0.3108 |
|
1223 |
-
| 0.4930 | 27900 | 0.4098 |
|
1224 |
-
| 0.4939 | 27950 | 0.3364 |
|
1225 |
-
| 0.4948 | 28000 | 0.3354 |
|
1226 |
-
| 0.4957 | 28050 | 0.2879 |
|
1227 |
-
| 0.4965 | 28100 | 0.3604 |
|
1228 |
-
| 0.4974 | 28150 | 0.2612 |
|
1229 |
-
| 0.4983 | 28200 | 0.3593 |
|
1230 |
-
| 0.4992 | 28250 | 0.2961 |
|
1231 |
-
| 0.5001 | 28300 | 0.3338 |
|
1232 |
-
| 0.5010 | 28350 | 0.3109 |
|
1233 |
-
| 0.5018 | 28400 | 0.3234 |
|
1234 |
-
| 0.5027 | 28450 | 0.3257 |
|
1235 |
-
| 0.5036 | 28500 | 0.4675 |
|
1236 |
-
| 0.5045 | 28550 | 0.4318 |
|
1237 |
-
| 0.5054 | 28600 | 0.3594 |
|
1238 |
-
| 0.5063 | 28650 | 0.3214 |
|
1239 |
-
| 0.5071 | 28700 | 0.2856 |
|
1240 |
-
| 0.5080 | 28750 | 0.3094 |
|
1241 |
-
| 0.5089 | 28800 | 0.3933 |
|
1242 |
-
| 0.5098 | 28850 | 0.3432 |
|
1243 |
-
| 0.5107 | 28900 | 0.3766 |
|
1244 |
-
| 0.5116 | 28950 | 0.3308 |
|
1245 |
-
| 0.5124 | 29000 | 0.3453 |
|
1246 |
-
| 0.5133 | 29050 | 0.2904 |
|
1247 |
-
| 0.5142 | 29100 | 0.2647 |
|
1248 |
-
| 0.5151 | 29150 | 0.4395 |
|
1249 |
-
| 0.5160 | 29200 | 0.295 |
|
1250 |
-
| 0.5169 | 29250 | 0.3927 |
|
1251 |
-
| 0.5178 | 29300 | 0.3492 |
|
1252 |
-
| 0.5186 | 29350 | 0.3304 |
|
1253 |
-
| 0.5195 | 29400 | 0.3557 |
|
1254 |
-
| 0.5204 | 29450 | 0.3389 |
|
1255 |
-
| 0.5213 | 29500 | 0.3322 |
|
1256 |
-
| 0.5222 | 29550 | 0.3053 |
|
1257 |
-
| 0.5231 | 29600 | 0.2486 |
|
1258 |
-
| 0.5239 | 29650 | 0.282 |
|
1259 |
-
| 0.5248 | 29700 | 0.3791 |
|
1260 |
-
| 0.5257 | 29750 | 0.3346 |
|
1261 |
-
| 0.5266 | 29800 | 0.2743 |
|
1262 |
-
| 0.5275 | 29850 | 0.2927 |
|
1263 |
-
| 0.5284 | 29900 | 0.3775 |
|
1264 |
-
| 0.5292 | 29950 | 0.3114 |
|
1265 |
-
| 0.5301 | 30000 | 0.2383 |
|
1266 |
-
| 0.5310 | 30050 | 0.3798 |
|
1267 |
-
| 0.5319 | 30100 | 0.3204 |
|
1268 |
-
| 0.5328 | 30150 | 0.2496 |
|
1269 |
-
| 0.5337 | 30200 | 0.4147 |
|
1270 |
-
| 0.5345 | 30250 | 0.3021 |
|
1271 |
-
| 0.5354 | 30300 | 0.2758 |
|
1272 |
-
| 0.5363 | 30350 | 0.3166 |
|
1273 |
-
| 0.5372 | 30400 | 0.35 |
|
1274 |
-
| 0.5381 | 30450 | 0.3391 |
|
1275 |
-
| 0.5390 | 30500 | 0.3576 |
|
1276 |
-
| 0.5398 | 30550 | 0.295 |
|
1277 |
-
| 0.5407 | 30600 | 0.3449 |
|
1278 |
-
| 0.5416 | 30650 | 0.3274 |
|
1279 |
-
| 0.5425 | 30700 | 0.3094 |
|
1280 |
-
| 0.5434 | 30750 | 0.3077 |
|
1281 |
-
| 0.5443 | 30800 | 0.3505 |
|
1282 |
-
| 0.5451 | 30850 | 0.3485 |
|
1283 |
-
| 0.5460 | 30900 | 0.331 |
|
1284 |
-
| 0.5469 | 30950 | 0.2846 |
|
1285 |
-
| 0.5478 | 31000 | 0.3647 |
|
1286 |
-
| 0.5487 | 31050 | 0.3475 |
|
1287 |
-
| 0.5496 | 31100 | 0.2833 |
|
1288 |
-
| 0.5504 | 31150 | 0.3 |
|
1289 |
-
| 0.5513 | 31200 | 0.3568 |
|
1290 |
-
| 0.5522 | 31250 | 0.3268 |
|
1291 |
-
| 0.5531 | 31300 | 0.4005 |
|
1292 |
-
| 0.5540 | 31350 | 0.2993 |
|
1293 |
-
| 0.5549 | 31400 | 0.3463 |
|
1294 |
-
| 0.5557 | 31450 | 0.3654 |
|
1295 |
-
| 0.5566 | 31500 | 0.3329 |
|
1296 |
-
| 0.5575 | 31550 | 0.2794 |
|
1297 |
-
| 0.5584 | 31600 | 0.4189 |
|
1298 |
-
| 0.5593 | 31650 | 0.3643 |
|
1299 |
-
| 0.5602 | 31700 | 0.3578 |
|
1300 |
-
| 0.5610 | 31750 | 0.3193 |
|
1301 |
-
| 0.5619 | 31800 | 0.327 |
|
1302 |
-
| 0.5628 | 31850 | 0.3429 |
|
1303 |
-
| 0.5637 | 31900 | 0.2994 |
|
1304 |
-
| 0.5646 | 31950 | 0.3219 |
|
1305 |
-
| 0.5655 | 32000 | 0.2902 |
|
1306 |
-
| 0.5663 | 32050 | 0.3896 |
|
1307 |
-
| 0.5672 | 32100 | 0.2491 |
|
1308 |
-
| 0.5681 | 32150 | 0.2663 |
|
1309 |
-
| 0.5690 | 32200 | 0.3433 |
|
1310 |
-
| 0.5699 | 32250 | 0.3375 |
|
1311 |
-
| 0.5708 | 32300 | 0.2891 |
|
1312 |
-
| 0.5716 | 32350 | 0.296 |
|
1313 |
-
| 0.5725 | 32400 | 0.2478 |
|
1314 |
-
| 0.5734 | 32450 | 0.3514 |
|
1315 |
-
| 0.5743 | 32500 | 0.2741 |
|
1316 |
-
| 0.5752 | 32550 | 0.3546 |
|
1317 |
-
| 0.5761 | 32600 | 0.3927 |
|
1318 |
-
| 0.5769 | 32650 | 0.2725 |
|
1319 |
-
| 0.5778 | 32700 | 0.3167 |
|
1320 |
-
| 0.5787 | 32750 | 0.3249 |
|
1321 |
-
| 0.5796 | 32800 | 0.2443 |
|
1322 |
-
| 0.5805 | 32850 | 0.4113 |
|
1323 |
-
| 0.5814 | 32900 | 0.3106 |
|
1324 |
-
| 0.5822 | 32950 | 0.2841 |
|
1325 |
-
| 0.5831 | 33000 | 0.2786 |
|
1326 |
-
| 0.5840 | 33050 | 0.3576 |
|
1327 |
-
| 0.5849 | 33100 | 0.2475 |
|
1328 |
-
| 0.5858 | 33150 | 0.348 |
|
1329 |
-
| 0.5867 | 33200 | 0.2779 |
|
1330 |
-
| 0.5875 | 33250 | 0.3166 |
|
1331 |
-
| 0.5884 | 33300 | 0.3448 |
|
1332 |
-
| 0.5893 | 33350 | 0.2409 |
|
1333 |
-
| 0.5902 | 33400 | 0.3313 |
|
1334 |
-
| 0.5911 | 33450 | 0.2981 |
|
1335 |
-
| 0.5920 | 33500 | 0.269 |
|
1336 |
-
| 0.5929 | 33550 | 0.4098 |
|
1337 |
-
| 0.5937 | 33600 | 0.2924 |
|
1338 |
|
1339 |
-
</details>
|
1340 |
|
1341 |
### Framework Versions
|
1342 |
- Python: 3.11.13
|
|
|
1187 |
</details>
|
1188 |
|
1189 |
### Training Logs
|
|
|
|
|
1190 |
| Epoch | Step | Training Loss |
|
1191 |
|:------:|:-----:|:-------------:|
|
1192 |
+
| 0.5946 | 33650 | 0.2952 |
|
1193 |
+
| 0.5955 | 33700 | 0.2754 |
|
1194 |
+
| 0.5964 | 33750 | 0.3434 |
|
1195 |
+
| 0.5973 | 33800 | 0.2541 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1196 |
|
|
|
1197 |
|
1198 |
### Framework Versions
|
1199 |
- Python: 3.11.13
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 90864192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5f44c393193a7debcaa9fe116b2e33229c62b31c7228329c28ad0491e8701e1a
|
3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 180609210
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:620d6a0f0e8b3c0b610d1a9d8d426a0e427c22a7ec7ed38356be652403968d4e
|
3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4ba676fcdff1c56d78801dd51dede93231c1d56645e7877743bcba848aec097a
|
3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 988
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b205c3d2c6273622b727175547a26240710159f4256bcd6246156ce73b10ee3f
|
3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5ff383a89d52f883cafae387eb5146463fb8074271a1417f846316599e03e648
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch": 0.
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -4712,6 +4712,34 @@
|
|
4712 |
"learning_rate": 2.258251359682708e-05,
|
4713 |
"loss": 0.2924,
|
4714 |
"step": 33600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4715 |
}
|
4716 |
],
|
4717 |
"logging_steps": 50,
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 0.5972681168383666,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 33800,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
4712 |
"learning_rate": 2.258251359682708e-05,
|
4713 |
"loss": 0.2924,
|
4714 |
"step": 33600
|
4715 |
+
},
|
4716 |
+
{
|
4717 |
+
"epoch": 0.594617518686717,
|
4718 |
+
"grad_norm": 2.0076584815979004,
|
4719 |
+
"learning_rate": 2.2533427578488545e-05,
|
4720 |
+
"loss": 0.2952,
|
4721 |
+
"step": 33650
|
4722 |
+
},
|
4723 |
+
{
|
4724 |
+
"epoch": 0.5955010514039335,
|
4725 |
+
"grad_norm": 1.203574299812317,
|
4726 |
+
"learning_rate": 2.2484341560150006e-05,
|
4727 |
+
"loss": 0.2754,
|
4728 |
+
"step": 33700
|
4729 |
+
},
|
4730 |
+
{
|
4731 |
+
"epoch": 0.59638458412115,
|
4732 |
+
"grad_norm": 2.815420150756836,
|
4733 |
+
"learning_rate": 2.243525554181147e-05,
|
4734 |
+
"loss": 0.3434,
|
4735 |
+
"step": 33750
|
4736 |
+
},
|
4737 |
+
{
|
4738 |
+
"epoch": 0.5972681168383666,
|
4739 |
+
"grad_norm": 1.487236499786377,
|
4740 |
+
"learning_rate": 2.2386169523472935e-05,
|
4741 |
+
"loss": 0.2541,
|
4742 |
+
"step": 33800
|
4743 |
}
|
4744 |
],
|
4745 |
"logging_steps": 50,
|
last-checkpoint/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5560
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:25a21a534e4993b863994e64d84a120efcce8aac5f212cbacbdb8f1e5edfbb2e
|
3 |
size 5560
|