Training in progress, step 51600, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
@@ -1187,144 +1187,13 @@ You can finetune this model on your own dataset.
|
|
1187 |
</details>
|
1188 |
|
1189 |
### Training Logs
|
1190 |
-
<details><summary>Click to expand</summary>
|
1191 |
-
|
1192 |
| Epoch | Step | Training Loss |
|
1193 |
|:------:|:-----:|:-------------:|
|
1194 |
-
| 0.
|
1195 |
-
| 0.
|
1196 |
-
| 0.
|
1197 |
-
| 0.
|
1198 |
-
| 0.7961 | 45050 | 0.2596 |
|
1199 |
-
| 0.7969 | 45100 | 0.311 |
|
1200 |
-
| 0.7978 | 45150 | 0.2368 |
|
1201 |
-
| 0.7987 | 45200 | 0.3147 |
|
1202 |
-
| 0.7996 | 45250 | 0.2964 |
|
1203 |
-
| 0.8005 | 45300 | 0.34 |
|
1204 |
-
| 0.8014 | 45350 | 0.3249 |
|
1205 |
-
| 0.8022 | 45400 | 0.4092 |
|
1206 |
-
| 0.8031 | 45450 | 0.2601 |
|
1207 |
-
| 0.8040 | 45500 | 0.306 |
|
1208 |
-
| 0.8049 | 45550 | 0.2888 |
|
1209 |
-
| 0.8058 | 45600 | 0.3101 |
|
1210 |
-
| 0.8067 | 45650 | 0.3148 |
|
1211 |
-
| 0.8075 | 45700 | 0.3975 |
|
1212 |
-
| 0.8084 | 45750 | 0.391 |
|
1213 |
-
| 0.8093 | 45800 | 0.3055 |
|
1214 |
-
| 0.8102 | 45850 | 0.2434 |
|
1215 |
-
| 0.8111 | 45900 | 0.285 |
|
1216 |
-
| 0.8120 | 45950 | 0.3952 |
|
1217 |
-
| 0.8129 | 46000 | 0.2802 |
|
1218 |
-
| 0.8137 | 46050 | 0.2687 |
|
1219 |
-
| 0.8146 | 46100 | 0.2787 |
|
1220 |
-
| 0.8155 | 46150 | 0.2943 |
|
1221 |
-
| 0.8164 | 46200 | 0.3386 |
|
1222 |
-
| 0.8173 | 46250 | 0.3227 |
|
1223 |
-
| 0.8182 | 46300 | 0.2582 |
|
1224 |
-
| 0.8190 | 46350 | 0.285 |
|
1225 |
-
| 0.8199 | 46400 | 0.2989 |
|
1226 |
-
| 0.8208 | 46450 | 0.2761 |
|
1227 |
-
| 0.8217 | 46500 | 0.299 |
|
1228 |
-
| 0.8226 | 46550 | 0.2908 |
|
1229 |
-
| 0.8235 | 46600 | 0.3134 |
|
1230 |
-
| 0.8243 | 46650 | 0.2603 |
|
1231 |
-
| 0.8252 | 46700 | 0.2965 |
|
1232 |
-
| 0.8261 | 46750 | 0.2506 |
|
1233 |
-
| 0.8270 | 46800 | 0.2258 |
|
1234 |
-
| 0.8279 | 46850 | 0.2209 |
|
1235 |
-
| 0.8288 | 46900 | 0.2756 |
|
1236 |
-
| 0.8296 | 46950 | 0.3345 |
|
1237 |
-
| 0.8305 | 47000 | 0.3016 |
|
1238 |
-
| 0.8314 | 47050 | 0.2712 |
|
1239 |
-
| 0.8323 | 47100 | 0.3721 |
|
1240 |
-
| 0.8332 | 47150 | 0.3483 |
|
1241 |
-
| 0.8341 | 47200 | 0.3002 |
|
1242 |
-
| 0.8349 | 47250 | 0.2333 |
|
1243 |
-
| 0.8358 | 47300 | 0.3043 |
|
1244 |
-
| 0.8367 | 47350 | 0.2992 |
|
1245 |
-
| 0.8376 | 47400 | 0.3367 |
|
1246 |
-
| 0.8385 | 47450 | 0.3135 |
|
1247 |
-
| 0.8394 | 47500 | 0.2681 |
|
1248 |
-
| 0.8402 | 47550 | 0.2764 |
|
1249 |
-
| 0.8411 | 47600 | 0.3211 |
|
1250 |
-
| 0.8420 | 47650 | 0.3081 |
|
1251 |
-
| 0.8429 | 47700 | 0.2929 |
|
1252 |
-
| 0.8438 | 47750 | 0.3466 |
|
1253 |
-
| 0.8447 | 47800 | 0.3012 |
|
1254 |
-
| 0.8455 | 47850 | 0.2953 |
|
1255 |
-
| 0.8464 | 47900 | 0.2914 |
|
1256 |
-
| 0.8473 | 47950 | 0.3219 |
|
1257 |
-
| 0.8482 | 48000 | 0.3104 |
|
1258 |
-
| 0.8491 | 48050 | 0.3676 |
|
1259 |
-
| 0.8500 | 48100 | 0.2989 |
|
1260 |
-
| 0.8508 | 48150 | 0.3259 |
|
1261 |
-
| 0.8517 | 48200 | 0.2949 |
|
1262 |
-
| 0.8526 | 48250 | 0.3914 |
|
1263 |
-
| 0.8535 | 48300 | 0.2645 |
|
1264 |
-
| 0.8544 | 48350 | 0.3358 |
|
1265 |
-
| 0.8553 | 48400 | 0.2939 |
|
1266 |
-
| 0.8561 | 48450 | 0.4063 |
|
1267 |
-
| 0.8570 | 48500 | 0.3492 |
|
1268 |
-
| 0.8579 | 48550 | 0.2794 |
|
1269 |
-
| 0.8588 | 48600 | 0.2854 |
|
1270 |
-
| 0.8597 | 48650 | 0.2664 |
|
1271 |
-
| 0.8606 | 48700 | 0.3028 |
|
1272 |
-
| 0.8614 | 48750 | 0.2579 |
|
1273 |
-
| 0.8623 | 48800 | 0.3404 |
|
1274 |
-
| 0.8632 | 48850 | 0.3535 |
|
1275 |
-
| 0.8641 | 48900 | 0.2224 |
|
1276 |
-
| 0.8650 | 48950 | 0.2701 |
|
1277 |
-
| 0.8659 | 49000 | 0.2506 |
|
1278 |
-
| 0.8667 | 49050 | 0.3032 |
|
1279 |
-
| 0.8676 | 49100 | 0.3881 |
|
1280 |
-
| 0.8685 | 49150 | 0.253 |
|
1281 |
-
| 0.8694 | 49200 | 0.2827 |
|
1282 |
-
| 0.8703 | 49250 | 0.266 |
|
1283 |
-
| 0.8712 | 49300 | 0.3008 |
|
1284 |
-
| 0.8720 | 49350 | 0.3406 |
|
1285 |
-
| 0.8729 | 49400 | 0.3348 |
|
1286 |
-
| 0.8738 | 49450 | 0.3021 |
|
1287 |
-
| 0.8747 | 49500 | 0.3182 |
|
1288 |
-
| 0.8756 | 49550 | 0.3063 |
|
1289 |
-
| 0.8765 | 49600 | 0.2385 |
|
1290 |
-
| 0.8773 | 49650 | 0.3152 |
|
1291 |
-
| 0.8782 | 49700 | 0.267 |
|
1292 |
-
| 0.8791 | 49750 | 0.2599 |
|
1293 |
-
| 0.8800 | 49800 | 0.3226 |
|
1294 |
-
| 0.8809 | 49850 | 0.3516 |
|
1295 |
-
| 0.8818 | 49900 | 0.2912 |
|
1296 |
-
| 0.8826 | 49950 | 0.2853 |
|
1297 |
-
| 0.8835 | 50000 | 0.3212 |
|
1298 |
-
| 0.8844 | 50050 | 0.3399 |
|
1299 |
-
| 0.8853 | 50100 | 0.3104 |
|
1300 |
-
| 0.8862 | 50150 | 0.368 |
|
1301 |
-
| 0.8871 | 50200 | 0.2848 |
|
1302 |
-
| 0.8880 | 50250 | 0.4137 |
|
1303 |
-
| 0.8888 | 50300 | 0.3597 |
|
1304 |
-
| 0.8897 | 50350 | 0.3246 |
|
1305 |
-
| 0.8906 | 50400 | 0.2658 |
|
1306 |
-
| 0.8915 | 50450 | 0.2954 |
|
1307 |
-
| 0.8924 | 50500 | 0.3035 |
|
1308 |
-
| 0.8933 | 50550 | 0.2654 |
|
1309 |
-
| 0.8941 | 50600 | 0.2742 |
|
1310 |
-
| 0.8950 | 50650 | 0.3435 |
|
1311 |
-
| 0.8959 | 50700 | 0.2947 |
|
1312 |
-
| 0.8968 | 50750 | 0.3013 |
|
1313 |
-
| 0.8977 | 50800 | 0.3845 |
|
1314 |
-
| 0.8986 | 50850 | 0.2882 |
|
1315 |
-
| 0.8994 | 50900 | 0.3639 |
|
1316 |
-
| 0.9003 | 50950 | 0.2332 |
|
1317 |
-
| 0.9012 | 51000 | 0.3363 |
|
1318 |
-
| 0.9021 | 51050 | 0.2674 |
|
1319 |
-
| 0.9030 | 51100 | 0.331 |
|
1320 |
-
| 0.9039 | 51150 | 0.2881 |
|
1321 |
-
| 0.9047 | 51200 | 0.3553 |
|
1322 |
-
| 0.9056 | 51250 | 0.2936 |
|
1323 |
-
| 0.9065 | 51300 | 0.2987 |
|
1324 |
-
| 0.9074 | 51350 | 0.3343 |
|
1325 |
-
| 0.9083 | 51400 | 0.2919 |
|
1326 |
|
1327 |
-
</details>
|
1328 |
|
1329 |
### Framework Versions
|
1330 |
- Python: 3.11.13
|
|
|
1187 |
</details>
|
1188 |
|
1189 |
### Training Logs
|
|
|
|
|
1190 |
| Epoch | Step | Training Loss |
|
1191 |
|:------:|:-----:|:-------------:|
|
1192 |
+
| 0.9092 | 51450 | 0.2997 |
|
1193 |
+
| 0.9100 | 51500 | 0.2247 |
|
1194 |
+
| 0.9109 | 51550 | 0.2918 |
|
1195 |
+
| 0.9118 | 51600 | 0.3295 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1196 |
|
|
|
1197 |
|
1198 |
### Framework Versions
|
1199 |
- Python: 3.11.13
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 90864192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aeecd8d9471d7a0b6627d01abbb6ab9354f23702be741b49535069a09a804f63
|
3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 180609210
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:33a86535350dee0d838aabde4505a86ae7f966c06b02b476617bf119348d925b
|
3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a589e7cc1c3f0f337824a3d6a8bc421970447f40e56c215be58c6b1d6c52a5a0
|
3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 988
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:39475f4e00274ea95478c0aa6d264be5db3bd87ad877f934d4b5cd1f7f759e82
|
3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b0adae6525b0259ff389512f804b95631a5bbbd0e13a53824c06deb281fe18e7
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch": 0.
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -7204,6 +7204,34 @@
|
|
7204 |
"learning_rate": 5.1167265516090395e-06,
|
7205 |
"loss": 0.2919,
|
7206 |
"step": 51400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7207 |
}
|
7208 |
],
|
7209 |
"logging_steps": 50,
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 0.9118057641674471,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 51600,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
7204 |
"learning_rate": 5.1167265516090395e-06,
|
7205 |
"loss": 0.2919,
|
7206 |
"step": 51400
|
7207 |
+
},
|
7208 |
+
{
|
7209 |
+
"epoch": 0.9091551660157976,
|
7210 |
+
"grad_norm": 1.3142715692520142,
|
7211 |
+
"learning_rate": 5.067640533270503e-06,
|
7212 |
+
"loss": 0.2997,
|
7213 |
+
"step": 51450
|
7214 |
+
},
|
7215 |
+
{
|
7216 |
+
"epoch": 0.9100386987330141,
|
7217 |
+
"grad_norm": 1.3387079238891602,
|
7218 |
+
"learning_rate": 5.018554514931967e-06,
|
7219 |
+
"loss": 0.2247,
|
7220 |
+
"step": 51500
|
7221 |
+
},
|
7222 |
+
{
|
7223 |
+
"epoch": 0.9109222314502307,
|
7224 |
+
"grad_norm": 1.9581636190414429,
|
7225 |
+
"learning_rate": 4.96946849659343e-06,
|
7226 |
+
"loss": 0.2918,
|
7227 |
+
"step": 51550
|
7228 |
+
},
|
7229 |
+
{
|
7230 |
+
"epoch": 0.9118057641674471,
|
7231 |
+
"grad_norm": 1.3822007179260254,
|
7232 |
+
"learning_rate": 4.920382478254894e-06,
|
7233 |
+
"loss": 0.3295,
|
7234 |
+
"step": 51600
|
7235 |
}
|
7236 |
],
|
7237 |
"logging_steps": 50,
|
last-checkpoint/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5560
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b1eb037399f5df91069f9b474c7a40c3e0d7a309caabb274d665499c0377b52
|
3 |
size 5560
|