Training in progress, step 26600, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
@@ -1187,376 +1187,13 @@ You can finetune this model on your own dataset.
|
|
1187 |
</details>
|
1188 |
|
1189 |
### Training Logs
|
1190 |
-
<details><summary>Click to expand</summary>
|
1191 |
-
|
1192 |
| Epoch | Step | Training Loss |
|
1193 |
|:------:|:-----:|:-------------:|
|
1194 |
-
| 0.
|
1195 |
-
| 0.
|
1196 |
-
| 0.
|
1197 |
-
| 0.
|
1198 |
-
| 0.1493 | 8450 | 0.398 |
|
1199 |
-
| 0.1502 | 8500 | 0.4203 |
|
1200 |
-
| 0.1511 | 8550 | 0.414 |
|
1201 |
-
| 0.1520 | 8600 | 0.3436 |
|
1202 |
-
| 0.1529 | 8650 | 0.436 |
|
1203 |
-
| 0.1537 | 8700 | 0.3761 |
|
1204 |
-
| 0.1546 | 8750 | 0.5579 |
|
1205 |
-
| 0.1555 | 8800 | 0.441 |
|
1206 |
-
| 0.1564 | 8850 | 0.5877 |
|
1207 |
-
| 0.1573 | 8900 | 0.4081 |
|
1208 |
-
| 0.1582 | 8950 | 0.4648 |
|
1209 |
-
| 0.1590 | 9000 | 0.4321 |
|
1210 |
-
| 0.1599 | 9050 | 0.4226 |
|
1211 |
-
| 0.1608 | 9100 | 0.3634 |
|
1212 |
-
| 0.1617 | 9150 | 0.4252 |
|
1213 |
-
| 0.1626 | 9200 | 0.3899 |
|
1214 |
-
| 0.1635 | 9250 | 0.4335 |
|
1215 |
-
| 0.1643 | 9300 | 0.4204 |
|
1216 |
-
| 0.1652 | 9350 | 0.5576 |
|
1217 |
-
| 0.1661 | 9400 | 0.4712 |
|
1218 |
-
| 0.1670 | 9450 | 0.366 |
|
1219 |
-
| 0.1679 | 9500 | 0.3932 |
|
1220 |
-
| 0.1688 | 9550 | 0.4836 |
|
1221 |
-
| 0.1696 | 9600 | 0.3989 |
|
1222 |
-
| 0.1705 | 9650 | 0.4025 |
|
1223 |
-
| 0.1714 | 9700 | 0.467 |
|
1224 |
-
| 0.1723 | 9750 | 0.3558 |
|
1225 |
-
| 0.1732 | 9800 | 0.3623 |
|
1226 |
-
| 0.1741 | 9850 | 0.3438 |
|
1227 |
-
| 0.1749 | 9900 | 0.4193 |
|
1228 |
-
| 0.1758 | 9950 | 0.3173 |
|
1229 |
-
| 0.1767 | 10000 | 0.4569 |
|
1230 |
-
| 0.1776 | 10050 | 0.4538 |
|
1231 |
-
| 0.1785 | 10100 | 0.4422 |
|
1232 |
-
| 0.1794 | 10150 | 0.3747 |
|
1233 |
-
| 0.1802 | 10200 | 0.3989 |
|
1234 |
-
| 0.1811 | 10250 | 0.5315 |
|
1235 |
-
| 0.1820 | 10300 | 0.3565 |
|
1236 |
-
| 0.1829 | 10350 | 0.4409 |
|
1237 |
-
| 0.1838 | 10400 | 0.3762 |
|
1238 |
-
| 0.1847 | 10450 | 0.4571 |
|
1239 |
-
| 0.1855 | 10500 | 0.3561 |
|
1240 |
-
| 0.1864 | 10550 | 0.4595 |
|
1241 |
-
| 0.1873 | 10600 | 0.4576 |
|
1242 |
-
| 0.1882 | 10650 | 0.4957 |
|
1243 |
-
| 0.1891 | 10700 | 0.3533 |
|
1244 |
-
| 0.1900 | 10750 | 0.5213 |
|
1245 |
-
| 0.1908 | 10800 | 0.4372 |
|
1246 |
-
| 0.1917 | 10850 | 0.3286 |
|
1247 |
-
| 0.1926 | 10900 | 0.4082 |
|
1248 |
-
| 0.1935 | 10950 | 0.4056 |
|
1249 |
-
| 0.1944 | 11000 | 0.4435 |
|
1250 |
-
| 0.1953 | 11050 | 0.4272 |
|
1251 |
-
| 0.1961 | 11100 | 0.4334 |
|
1252 |
-
| 0.1970 | 11150 | 0.4479 |
|
1253 |
-
| 0.1979 | 11200 | 0.3545 |
|
1254 |
-
| 0.1988 | 11250 | 0.3192 |
|
1255 |
-
| 0.1997 | 11300 | 0.3883 |
|
1256 |
-
| 0.2006 | 11350 | 0.3312 |
|
1257 |
-
| 0.2014 | 11400 | 0.4888 |
|
1258 |
-
| 0.2023 | 11450 | 0.5102 |
|
1259 |
-
| 0.2032 | 11500 | 0.4133 |
|
1260 |
-
| 0.2041 | 11550 | 0.4255 |
|
1261 |
-
| 0.2050 | 11600 | 0.3766 |
|
1262 |
-
| 0.2059 | 11650 | 0.3651 |
|
1263 |
-
| 0.2067 | 11700 | 0.4562 |
|
1264 |
-
| 0.2076 | 11750 | 0.3946 |
|
1265 |
-
| 0.2085 | 11800 | 0.4075 |
|
1266 |
-
| 0.2094 | 11850 | 0.4304 |
|
1267 |
-
| 0.2103 | 11900 | 0.3404 |
|
1268 |
-
| 0.2112 | 11950 | 0.4013 |
|
1269 |
-
| 0.2120 | 12000 | 0.4278 |
|
1270 |
-
| 0.2129 | 12050 | 0.3905 |
|
1271 |
-
| 0.2138 | 12100 | 0.4082 |
|
1272 |
-
| 0.2147 | 12150 | 0.3855 |
|
1273 |
-
| 0.2156 | 12200 | 0.4567 |
|
1274 |
-
| 0.2165 | 12250 | 0.4368 |
|
1275 |
-
| 0.2173 | 12300 | 0.3686 |
|
1276 |
-
| 0.2182 | 12350 | 0.4163 |
|
1277 |
-
| 0.2191 | 12400 | 0.3595 |
|
1278 |
-
| 0.2200 | 12450 | 0.3326 |
|
1279 |
-
| 0.2209 | 12500 | 0.3775 |
|
1280 |
-
| 0.2218 | 12550 | 0.3695 |
|
1281 |
-
| 0.2227 | 12600 | 0.3545 |
|
1282 |
-
| 0.2235 | 12650 | 0.3548 |
|
1283 |
-
| 0.2244 | 12700 | 0.4847 |
|
1284 |
-
| 0.2253 | 12750 | 0.4 |
|
1285 |
-
| 0.2262 | 12800 | 0.4755 |
|
1286 |
-
| 0.2271 | 12850 | 0.3399 |
|
1287 |
-
| 0.2280 | 12900 | 0.3297 |
|
1288 |
-
| 0.2288 | 12950 | 0.4071 |
|
1289 |
-
| 0.2297 | 13000 | 0.4069 |
|
1290 |
-
| 0.2306 | 13050 | 0.5156 |
|
1291 |
-
| 0.2315 | 13100 | 0.4359 |
|
1292 |
-
| 0.2324 | 13150 | 0.3702 |
|
1293 |
-
| 0.2333 | 13200 | 0.5026 |
|
1294 |
-
| 0.2341 | 13250 | 0.5201 |
|
1295 |
-
| 0.2350 | 13300 | 0.3857 |
|
1296 |
-
| 0.2359 | 13350 | 0.3555 |
|
1297 |
-
| 0.2368 | 13400 | 0.381 |
|
1298 |
-
| 0.2377 | 13450 | 0.3804 |
|
1299 |
-
| 0.2386 | 13500 | 0.3666 |
|
1300 |
-
| 0.2394 | 13550 | 0.3792 |
|
1301 |
-
| 0.2403 | 13600 | 0.3341 |
|
1302 |
-
| 0.2412 | 13650 | 0.4906 |
|
1303 |
-
| 0.2421 | 13700 | 0.541 |
|
1304 |
-
| 0.2430 | 13750 | 0.3936 |
|
1305 |
-
| 0.2439 | 13800 | 0.3871 |
|
1306 |
-
| 0.2447 | 13850 | 0.2983 |
|
1307 |
-
| 0.2456 | 13900 | 0.4071 |
|
1308 |
-
| 0.2465 | 13950 | 0.3434 |
|
1309 |
-
| 0.2474 | 14000 | 0.3885 |
|
1310 |
-
| 0.2483 | 14050 | 0.4296 |
|
1311 |
-
| 0.2492 | 14100 | 0.3853 |
|
1312 |
-
| 0.2500 | 14150 | 0.4068 |
|
1313 |
-
| 0.2509 | 14200 | 0.4071 |
|
1314 |
-
| 0.2518 | 14250 | 0.3588 |
|
1315 |
-
| 0.2527 | 14300 | 0.4161 |
|
1316 |
-
| 0.2536 | 14350 | 0.3431 |
|
1317 |
-
| 0.2545 | 14400 | 0.3576 |
|
1318 |
-
| 0.2553 | 14450 | 0.3563 |
|
1319 |
-
| 0.2562 | 14500 | 0.3406 |
|
1320 |
-
| 0.2571 | 14550 | 0.4397 |
|
1321 |
-
| 0.2580 | 14600 | 0.411 |
|
1322 |
-
| 0.2589 | 14650 | 0.3742 |
|
1323 |
-
| 0.2598 | 14700 | 0.3622 |
|
1324 |
-
| 0.2606 | 14750 | 0.2782 |
|
1325 |
-
| 0.2615 | 14800 | 0.36 |
|
1326 |
-
| 0.2624 | 14850 | 0.486 |
|
1327 |
-
| 0.2633 | 14900 | 0.406 |
|
1328 |
-
| 0.2642 | 14950 | 0.357 |
|
1329 |
-
| 0.2651 | 15000 | 0.2855 |
|
1330 |
-
| 0.2659 | 15050 | 0.4424 |
|
1331 |
-
| 0.2668 | 15100 | 0.352 |
|
1332 |
-
| 0.2677 | 15150 | 0.3638 |
|
1333 |
-
| 0.2686 | 15200 | 0.3727 |
|
1334 |
-
| 0.2695 | 15250 | 0.3842 |
|
1335 |
-
| 0.2704 | 15300 | 0.3625 |
|
1336 |
-
| 0.2712 | 15350 | 0.4088 |
|
1337 |
-
| 0.2721 | 15400 | 0.4126 |
|
1338 |
-
| 0.2730 | 15450 | 0.4662 |
|
1339 |
-
| 0.2739 | 15500 | 0.3889 |
|
1340 |
-
| 0.2748 | 15550 | 0.3618 |
|
1341 |
-
| 0.2757 | 15600 | 0.4126 |
|
1342 |
-
| 0.2765 | 15650 | 0.3771 |
|
1343 |
-
| 0.2774 | 15700 | 0.4377 |
|
1344 |
-
| 0.2783 | 15750 | 0.4041 |
|
1345 |
-
| 0.2792 | 15800 | 0.375 |
|
1346 |
-
| 0.2801 | 15850 | 0.3339 |
|
1347 |
-
| 0.2810 | 15900 | 0.348 |
|
1348 |
-
| 0.2818 | 15950 | 0.367 |
|
1349 |
-
| 0.2827 | 16000 | 0.3427 |
|
1350 |
-
| 0.2836 | 16050 | 0.3637 |
|
1351 |
-
| 0.2845 | 16100 | 0.3489 |
|
1352 |
-
| 0.2854 | 16150 | 0.4209 |
|
1353 |
-
| 0.2863 | 16200 | 0.2984 |
|
1354 |
-
| 0.2871 | 16250 | 0.3877 |
|
1355 |
-
| 0.2880 | 16300 | 0.3508 |
|
1356 |
-
| 0.2889 | 16350 | 0.3443 |
|
1357 |
-
| 0.2898 | 16400 | 0.4346 |
|
1358 |
-
| 0.2907 | 16450 | 0.4262 |
|
1359 |
-
| 0.2916 | 16500 | 0.3065 |
|
1360 |
-
| 0.2924 | 16550 | 0.4003 |
|
1361 |
-
| 0.2933 | 16600 | 0.5295 |
|
1362 |
-
| 0.2942 | 16650 | 0.3582 |
|
1363 |
-
| 0.2951 | 16700 | 0.3299 |
|
1364 |
-
| 0.2960 | 16750 | 0.3493 |
|
1365 |
-
| 0.2969 | 16800 | 0.2752 |
|
1366 |
-
| 0.2978 | 16850 | 0.3254 |
|
1367 |
-
| 0.2986 | 16900 | 0.3352 |
|
1368 |
-
| 0.2995 | 16950 | 0.3812 |
|
1369 |
-
| 0.3004 | 17000 | 0.3332 |
|
1370 |
-
| 0.3013 | 17050 | 0.3472 |
|
1371 |
-
| 0.3022 | 17100 | 0.4207 |
|
1372 |
-
| 0.3031 | 17150 | 0.3849 |
|
1373 |
-
| 0.3039 | 17200 | 0.3272 |
|
1374 |
-
| 0.3048 | 17250 | 0.3037 |
|
1375 |
-
| 0.3057 | 17300 | 0.2924 |
|
1376 |
-
| 0.3066 | 17350 | 0.4029 |
|
1377 |
-
| 0.3075 | 17400 | 0.5016 |
|
1378 |
-
| 0.3084 | 17450 | 0.3648 |
|
1379 |
-
| 0.3092 | 17500 | 0.3045 |
|
1380 |
-
| 0.3101 | 17550 | 0.3271 |
|
1381 |
-
| 0.3110 | 17600 | 0.519 |
|
1382 |
-
| 0.3119 | 17650 | 0.341 |
|
1383 |
-
| 0.3128 | 17700 | 0.3352 |
|
1384 |
-
| 0.3137 | 17750 | 0.4406 |
|
1385 |
-
| 0.3145 | 17800 | 0.3822 |
|
1386 |
-
| 0.3154 | 17850 | 0.287 |
|
1387 |
-
| 0.3163 | 17900 | 0.4349 |
|
1388 |
-
| 0.3172 | 17950 | 0.3633 |
|
1389 |
-
| 0.3181 | 18000 | 0.3431 |
|
1390 |
-
| 0.3190 | 18050 | 0.3986 |
|
1391 |
-
| 0.3198 | 18100 | 0.3279 |
|
1392 |
-
| 0.3207 | 18150 | 0.3062 |
|
1393 |
-
| 0.3216 | 18200 | 0.2973 |
|
1394 |
-
| 0.3225 | 18250 | 0.4078 |
|
1395 |
-
| 0.3234 | 18300 | 0.31 |
|
1396 |
-
| 0.3243 | 18350 | 0.306 |
|
1397 |
-
| 0.3251 | 18400 | 0.3426 |
|
1398 |
-
| 0.3260 | 18450 | 0.2807 |
|
1399 |
-
| 0.3269 | 18500 | 0.3856 |
|
1400 |
-
| 0.3278 | 18550 | 0.3575 |
|
1401 |
-
| 0.3287 | 18600 | 0.347 |
|
1402 |
-
| 0.3296 | 18650 | 0.3195 |
|
1403 |
-
| 0.3304 | 18700 | 0.3543 |
|
1404 |
-
| 0.3313 | 18750 | 0.3642 |
|
1405 |
-
| 0.3322 | 18800 | 0.3415 |
|
1406 |
-
| 0.3331 | 18850 | 0.3069 |
|
1407 |
-
| 0.3340 | 18900 | 0.2962 |
|
1408 |
-
| 0.3349 | 18950 | 0.3156 |
|
1409 |
-
| 0.3357 | 19000 | 0.3791 |
|
1410 |
-
| 0.3366 | 19050 | 0.3287 |
|
1411 |
-
| 0.3375 | 19100 | 0.4034 |
|
1412 |
-
| 0.3384 | 19150 | 0.3476 |
|
1413 |
-
| 0.3393 | 19200 | 0.363 |
|
1414 |
-
| 0.3402 | 19250 | 0.3182 |
|
1415 |
-
| 0.3410 | 19300 | 0.3238 |
|
1416 |
-
| 0.3419 | 19350 | 0.3043 |
|
1417 |
-
| 0.3428 | 19400 | 0.3902 |
|
1418 |
-
| 0.3437 | 19450 | 0.3085 |
|
1419 |
-
| 0.3446 | 19500 | 0.3162 |
|
1420 |
-
| 0.3455 | 19550 | 0.345 |
|
1421 |
-
| 0.3463 | 19600 | 0.3017 |
|
1422 |
-
| 0.3472 | 19650 | 0.4232 |
|
1423 |
-
| 0.3481 | 19700 | 0.3363 |
|
1424 |
-
| 0.3490 | 19750 | 0.3484 |
|
1425 |
-
| 0.3499 | 19800 | 0.3719 |
|
1426 |
-
| 0.3508 | 19850 | 0.3226 |
|
1427 |
-
| 0.3516 | 19900 | 0.3636 |
|
1428 |
-
| 0.3525 | 19950 | 0.3704 |
|
1429 |
-
| 0.3534 | 20000 | 0.3459 |
|
1430 |
-
| 0.3543 | 20050 | 0.3226 |
|
1431 |
-
| 0.3552 | 20100 | 0.3256 |
|
1432 |
-
| 0.3561 | 20150 | 0.3179 |
|
1433 |
-
| 0.3569 | 20200 | 0.4226 |
|
1434 |
-
| 0.3578 | 20250 | 0.4196 |
|
1435 |
-
| 0.3587 | 20300 | 0.3618 |
|
1436 |
-
| 0.3596 | 20350 | 0.4093 |
|
1437 |
-
| 0.3605 | 20400 | 0.3051 |
|
1438 |
-
| 0.3614 | 20450 | 0.3379 |
|
1439 |
-
| 0.3622 | 20500 | 0.3582 |
|
1440 |
-
| 0.3631 | 20550 | 0.4004 |
|
1441 |
-
| 0.3640 | 20600 | 0.33 |
|
1442 |
-
| 0.3649 | 20650 | 0.3072 |
|
1443 |
-
| 0.3658 | 20700 | 0.35 |
|
1444 |
-
| 0.3667 | 20750 | 0.3241 |
|
1445 |
-
| 0.3675 | 20800 | 0.37 |
|
1446 |
-
| 0.3684 | 20850 | 0.2689 |
|
1447 |
-
| 0.3693 | 20900 | 0.4061 |
|
1448 |
-
| 0.3702 | 20950 | 0.3412 |
|
1449 |
-
| 0.3711 | 21000 | 0.3619 |
|
1450 |
-
| 0.3720 | 21050 | 0.314 |
|
1451 |
-
| 0.3729 | 21100 | 0.2912 |
|
1452 |
-
| 0.3737 | 21150 | 0.3402 |
|
1453 |
-
| 0.3746 | 21200 | 0.3462 |
|
1454 |
-
| 0.3755 | 21250 | 0.2775 |
|
1455 |
-
| 0.3764 | 21300 | 0.3825 |
|
1456 |
-
| 0.3773 | 21350 | 0.3931 |
|
1457 |
-
| 0.3782 | 21400 | 0.4059 |
|
1458 |
-
| 0.3790 | 21450 | 0.3264 |
|
1459 |
-
| 0.3799 | 21500 | 0.2605 |
|
1460 |
-
| 0.3808 | 21550 | 0.3658 |
|
1461 |
-
| 0.3817 | 21600 | 0.3274 |
|
1462 |
-
| 0.3826 | 21650 | 0.3094 |
|
1463 |
-
| 0.3835 | 21700 | 0.3556 |
|
1464 |
-
| 0.3843 | 21750 | 0.2877 |
|
1465 |
-
| 0.3852 | 21800 | 0.3203 |
|
1466 |
-
| 0.3861 | 21850 | 0.2553 |
|
1467 |
-
| 0.3870 | 21900 | 0.4425 |
|
1468 |
-
| 0.3879 | 21950 | 0.3249 |
|
1469 |
-
| 0.3888 | 22000 | 0.3649 |
|
1470 |
-
| 0.3896 | 22050 | 0.3621 |
|
1471 |
-
| 0.3905 | 22100 | 0.4232 |
|
1472 |
-
| 0.3914 | 22150 | 0.266 |
|
1473 |
-
| 0.3923 | 22200 | 0.3767 |
|
1474 |
-
| 0.3932 | 22250 | 0.366 |
|
1475 |
-
| 0.3941 | 22300 | 0.3211 |
|
1476 |
-
| 0.3949 | 22350 | 0.2505 |
|
1477 |
-
| 0.3958 | 22400 | 0.3824 |
|
1478 |
-
| 0.3967 | 22450 | 0.331 |
|
1479 |
-
| 0.3976 | 22500 | 0.3136 |
|
1480 |
-
| 0.3985 | 22550 | 0.297 |
|
1481 |
-
| 0.3994 | 22600 | 0.2933 |
|
1482 |
-
| 0.4002 | 22650 | 0.4026 |
|
1483 |
-
| 0.4011 | 22700 | 0.3567 |
|
1484 |
-
| 0.4020 | 22750 | 0.3711 |
|
1485 |
-
| 0.4029 | 22800 | 0.378 |
|
1486 |
-
| 0.4038 | 22850 | 0.355 |
|
1487 |
-
| 0.4047 | 22900 | 0.3044 |
|
1488 |
-
| 0.4055 | 22950 | 0.357 |
|
1489 |
-
| 0.4064 | 23000 | 0.3616 |
|
1490 |
-
| 0.4073 | 23050 | 0.3139 |
|
1491 |
-
| 0.4082 | 23100 | 0.3474 |
|
1492 |
-
| 0.4091 | 23150 | 0.3208 |
|
1493 |
-
| 0.4100 | 23200 | 0.3798 |
|
1494 |
-
| 0.4108 | 23250 | 0.3282 |
|
1495 |
-
| 0.4117 | 23300 | 0.3302 |
|
1496 |
-
| 0.4126 | 23350 | 0.3599 |
|
1497 |
-
| 0.4135 | 23400 | 0.3608 |
|
1498 |
-
| 0.4144 | 23450 | 0.3387 |
|
1499 |
-
| 0.4153 | 23500 | 0.3987 |
|
1500 |
-
| 0.4161 | 23550 | 0.3387 |
|
1501 |
-
| 0.4170 | 23600 | 0.2989 |
|
1502 |
-
| 0.4179 | 23650 | 0.2629 |
|
1503 |
-
| 0.4188 | 23700 | 0.2905 |
|
1504 |
-
| 0.4197 | 23750 | 0.3234 |
|
1505 |
-
| 0.4206 | 23800 | 0.3063 |
|
1506 |
-
| 0.4214 | 23850 | 0.3386 |
|
1507 |
-
| 0.4223 | 23900 | 0.3431 |
|
1508 |
-
| 0.4232 | 23950 | 0.2902 |
|
1509 |
-
| 0.4241 | 24000 | 0.3136 |
|
1510 |
-
| 0.4250 | 24050 | 0.4001 |
|
1511 |
-
| 0.4259 | 24100 | 0.285 |
|
1512 |
-
| 0.4267 | 24150 | 0.4717 |
|
1513 |
-
| 0.4276 | 24200 | 0.3391 |
|
1514 |
-
| 0.4285 | 24250 | 0.2913 |
|
1515 |
-
| 0.4294 | 24300 | 0.3417 |
|
1516 |
-
| 0.4303 | 24350 | 0.2909 |
|
1517 |
-
| 0.4312 | 24400 | 0.2871 |
|
1518 |
-
| 0.4320 | 24450 | 0.3929 |
|
1519 |
-
| 0.4329 | 24500 | 0.278 |
|
1520 |
-
| 0.4338 | 24550 | 0.4386 |
|
1521 |
-
| 0.4347 | 24600 | 0.4193 |
|
1522 |
-
| 0.4356 | 24650 | 0.367 |
|
1523 |
-
| 0.4365 | 24700 | 0.3474 |
|
1524 |
-
| 0.4373 | 24750 | 0.3116 |
|
1525 |
-
| 0.4382 | 24800 | 0.2733 |
|
1526 |
-
| 0.4391 | 24850 | 0.3046 |
|
1527 |
-
| 0.4400 | 24900 | 0.3627 |
|
1528 |
-
| 0.4409 | 24950 | 0.3403 |
|
1529 |
-
| 0.4418 | 25000 | 0.349 |
|
1530 |
-
| 0.4426 | 25050 | 0.3527 |
|
1531 |
-
| 0.4435 | 25100 | 0.3773 |
|
1532 |
-
| 0.4444 | 25150 | 0.3372 |
|
1533 |
-
| 0.4453 | 25200 | 0.3178 |
|
1534 |
-
| 0.4462 | 25250 | 0.2745 |
|
1535 |
-
| 0.4471 | 25300 | 0.2773 |
|
1536 |
-
| 0.4480 | 25350 | 0.3822 |
|
1537 |
-
| 0.4488 | 25400 | 0.3851 |
|
1538 |
-
| 0.4497 | 25450 | 0.3805 |
|
1539 |
-
| 0.4506 | 25500 | 0.3245 |
|
1540 |
-
| 0.4515 | 25550 | 0.2978 |
|
1541 |
-
| 0.4524 | 25600 | 0.3397 |
|
1542 |
-
| 0.4533 | 25650 | 0.4655 |
|
1543 |
-
| 0.4541 | 25700 | 0.2669 |
|
1544 |
-
| 0.4550 | 25750 | 0.3316 |
|
1545 |
-
| 0.4559 | 25800 | 0.3395 |
|
1546 |
-
| 0.4568 | 25850 | 0.3631 |
|
1547 |
-
| 0.4577 | 25900 | 0.3418 |
|
1548 |
-
| 0.4586 | 25950 | 0.3351 |
|
1549 |
-
| 0.4594 | 26000 | 0.3117 |
|
1550 |
-
| 0.4603 | 26050 | 0.3197 |
|
1551 |
-
| 0.4612 | 26100 | 0.2856 |
|
1552 |
-
| 0.4621 | 26150 | 0.3668 |
|
1553 |
-
| 0.4630 | 26200 | 0.4672 |
|
1554 |
-
| 0.4639 | 26250 | 0.373 |
|
1555 |
-
| 0.4647 | 26300 | 0.3232 |
|
1556 |
-
| 0.4656 | 26350 | 0.3461 |
|
1557 |
-
| 0.4665 | 26400 | 0.2937 |
|
1558 |
|
1559 |
-
</details>
|
1560 |
|
1561 |
### Framework Versions
|
1562 |
- Python: 3.11.13
|
|
|
1187 |
</details>
|
1188 |
|
1189 |
### Training Logs
|
|
|
|
|
1190 |
| Epoch | Step | Training Loss |
|
1191 |
|:------:|:-----:|:-------------:|
|
1192 |
+
| 0.4674 | 26450 | 0.3511 |
|
1193 |
+
| 0.4683 | 26500 | 0.3204 |
|
1194 |
+
| 0.4692 | 26550 | 0.2698 |
|
1195 |
+
| 0.4700 | 26600 | 0.3019 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1196 |
|
|
|
1197 |
|
1198 |
### Framework Versions
|
1199 |
- Python: 3.11.13
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 90864192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:efcfe5aa0c5005b2003bf869f963a20c876745fa7f627f40c3819a73de45b137
|
3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 180609210
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e583966410e9fff5f8ac46b24676c49f860c538a27398df242344bd0d2a7d08c
|
3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0bea9f5fcadeb0cd0d9a943463b72fc5766ebc9e57169008c5a18a91f57902fe
|
3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 988
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc231b83c50d1c7b06b068e6375671742fac1a054d31705297b772d1dffbf9a2
|
3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:93fabb93104e1056a717edb04db9d7d8955bbdc038b375db1506268b873e6fee
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch": 0.
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -3704,6 +3704,34 @@
|
|
3704 |
"learning_rate": 2.9647955076476018e-05,
|
3705 |
"loss": 0.2937,
|
3706 |
"step": 26400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3707 |
}
|
3708 |
],
|
3709 |
"logging_steps": 50,
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 0.4700394055591879,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 26600,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
3704 |
"learning_rate": 2.9647955076476018e-05,
|
3705 |
"loss": 0.2937,
|
3706 |
"step": 26400
|
3707 |
+
},
|
3708 |
+
{
|
3709 |
+
"epoch": 0.4673888074075383,
|
3710 |
+
"grad_norm": 1.3711694478988647,
|
3711 |
+
"learning_rate": 2.9599850778504252e-05,
|
3712 |
+
"loss": 0.3511,
|
3713 |
+
"step": 26450
|
3714 |
+
},
|
3715 |
+
{
|
3716 |
+
"epoch": 0.4682723401247548,
|
3717 |
+
"grad_norm": 3.0807628631591797,
|
3718 |
+
"learning_rate": 2.9550764760165717e-05,
|
3719 |
+
"loss": 0.3204,
|
3720 |
+
"step": 26500
|
3721 |
+
},
|
3722 |
+
{
|
3723 |
+
"epoch": 0.46915587284197136,
|
3724 |
+
"grad_norm": 1.5949090719223022,
|
3725 |
+
"learning_rate": 2.950167874182718e-05,
|
3726 |
+
"loss": 0.2698,
|
3727 |
+
"step": 26550
|
3728 |
+
},
|
3729 |
+
{
|
3730 |
+
"epoch": 0.4700394055591879,
|
3731 |
+
"grad_norm": 1.6748404502868652,
|
3732 |
+
"learning_rate": 2.9452592723488643e-05,
|
3733 |
+
"loss": 0.3019,
|
3734 |
+
"step": 26600
|
3735 |
}
|
3736 |
],
|
3737 |
"logging_steps": 50,
|
last-checkpoint/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5560
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6fff70b6f1e4863ea80efbdd3030d9dd2b5fc82ab59547db2165691e8a1c93fc
|
3 |
size 5560
|