guyhadad01 commited on
Commit
c8ee144
·
verified ·
1 Parent(s): d2b9a88

Training in progress, step 26600, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -1187,376 +1187,13 @@ You can finetune this model on your own dataset.
1187
  </details>
1188
 
1189
  ### Training Logs
1190
- <details><summary>Click to expand</summary>
1191
-
1192
  | Epoch | Step | Training Loss |
1193
  |:------:|:-----:|:-------------:|
1194
- | 0.1458 | 8250 | 0.4688 |
1195
- | 0.1467 | 8300 | 0.3967 |
1196
- | 0.1475 | 8350 | 0.4911 |
1197
- | 0.1484 | 8400 | 0.4076 |
1198
- | 0.1493 | 8450 | 0.398 |
1199
- | 0.1502 | 8500 | 0.4203 |
1200
- | 0.1511 | 8550 | 0.414 |
1201
- | 0.1520 | 8600 | 0.3436 |
1202
- | 0.1529 | 8650 | 0.436 |
1203
- | 0.1537 | 8700 | 0.3761 |
1204
- | 0.1546 | 8750 | 0.5579 |
1205
- | 0.1555 | 8800 | 0.441 |
1206
- | 0.1564 | 8850 | 0.5877 |
1207
- | 0.1573 | 8900 | 0.4081 |
1208
- | 0.1582 | 8950 | 0.4648 |
1209
- | 0.1590 | 9000 | 0.4321 |
1210
- | 0.1599 | 9050 | 0.4226 |
1211
- | 0.1608 | 9100 | 0.3634 |
1212
- | 0.1617 | 9150 | 0.4252 |
1213
- | 0.1626 | 9200 | 0.3899 |
1214
- | 0.1635 | 9250 | 0.4335 |
1215
- | 0.1643 | 9300 | 0.4204 |
1216
- | 0.1652 | 9350 | 0.5576 |
1217
- | 0.1661 | 9400 | 0.4712 |
1218
- | 0.1670 | 9450 | 0.366 |
1219
- | 0.1679 | 9500 | 0.3932 |
1220
- | 0.1688 | 9550 | 0.4836 |
1221
- | 0.1696 | 9600 | 0.3989 |
1222
- | 0.1705 | 9650 | 0.4025 |
1223
- | 0.1714 | 9700 | 0.467 |
1224
- | 0.1723 | 9750 | 0.3558 |
1225
- | 0.1732 | 9800 | 0.3623 |
1226
- | 0.1741 | 9850 | 0.3438 |
1227
- | 0.1749 | 9900 | 0.4193 |
1228
- | 0.1758 | 9950 | 0.3173 |
1229
- | 0.1767 | 10000 | 0.4569 |
1230
- | 0.1776 | 10050 | 0.4538 |
1231
- | 0.1785 | 10100 | 0.4422 |
1232
- | 0.1794 | 10150 | 0.3747 |
1233
- | 0.1802 | 10200 | 0.3989 |
1234
- | 0.1811 | 10250 | 0.5315 |
1235
- | 0.1820 | 10300 | 0.3565 |
1236
- | 0.1829 | 10350 | 0.4409 |
1237
- | 0.1838 | 10400 | 0.3762 |
1238
- | 0.1847 | 10450 | 0.4571 |
1239
- | 0.1855 | 10500 | 0.3561 |
1240
- | 0.1864 | 10550 | 0.4595 |
1241
- | 0.1873 | 10600 | 0.4576 |
1242
- | 0.1882 | 10650 | 0.4957 |
1243
- | 0.1891 | 10700 | 0.3533 |
1244
- | 0.1900 | 10750 | 0.5213 |
1245
- | 0.1908 | 10800 | 0.4372 |
1246
- | 0.1917 | 10850 | 0.3286 |
1247
- | 0.1926 | 10900 | 0.4082 |
1248
- | 0.1935 | 10950 | 0.4056 |
1249
- | 0.1944 | 11000 | 0.4435 |
1250
- | 0.1953 | 11050 | 0.4272 |
1251
- | 0.1961 | 11100 | 0.4334 |
1252
- | 0.1970 | 11150 | 0.4479 |
1253
- | 0.1979 | 11200 | 0.3545 |
1254
- | 0.1988 | 11250 | 0.3192 |
1255
- | 0.1997 | 11300 | 0.3883 |
1256
- | 0.2006 | 11350 | 0.3312 |
1257
- | 0.2014 | 11400 | 0.4888 |
1258
- | 0.2023 | 11450 | 0.5102 |
1259
- | 0.2032 | 11500 | 0.4133 |
1260
- | 0.2041 | 11550 | 0.4255 |
1261
- | 0.2050 | 11600 | 0.3766 |
1262
- | 0.2059 | 11650 | 0.3651 |
1263
- | 0.2067 | 11700 | 0.4562 |
1264
- | 0.2076 | 11750 | 0.3946 |
1265
- | 0.2085 | 11800 | 0.4075 |
1266
- | 0.2094 | 11850 | 0.4304 |
1267
- | 0.2103 | 11900 | 0.3404 |
1268
- | 0.2112 | 11950 | 0.4013 |
1269
- | 0.2120 | 12000 | 0.4278 |
1270
- | 0.2129 | 12050 | 0.3905 |
1271
- | 0.2138 | 12100 | 0.4082 |
1272
- | 0.2147 | 12150 | 0.3855 |
1273
- | 0.2156 | 12200 | 0.4567 |
1274
- | 0.2165 | 12250 | 0.4368 |
1275
- | 0.2173 | 12300 | 0.3686 |
1276
- | 0.2182 | 12350 | 0.4163 |
1277
- | 0.2191 | 12400 | 0.3595 |
1278
- | 0.2200 | 12450 | 0.3326 |
1279
- | 0.2209 | 12500 | 0.3775 |
1280
- | 0.2218 | 12550 | 0.3695 |
1281
- | 0.2227 | 12600 | 0.3545 |
1282
- | 0.2235 | 12650 | 0.3548 |
1283
- | 0.2244 | 12700 | 0.4847 |
1284
- | 0.2253 | 12750 | 0.4 |
1285
- | 0.2262 | 12800 | 0.4755 |
1286
- | 0.2271 | 12850 | 0.3399 |
1287
- | 0.2280 | 12900 | 0.3297 |
1288
- | 0.2288 | 12950 | 0.4071 |
1289
- | 0.2297 | 13000 | 0.4069 |
1290
- | 0.2306 | 13050 | 0.5156 |
1291
- | 0.2315 | 13100 | 0.4359 |
1292
- | 0.2324 | 13150 | 0.3702 |
1293
- | 0.2333 | 13200 | 0.5026 |
1294
- | 0.2341 | 13250 | 0.5201 |
1295
- | 0.2350 | 13300 | 0.3857 |
1296
- | 0.2359 | 13350 | 0.3555 |
1297
- | 0.2368 | 13400 | 0.381 |
1298
- | 0.2377 | 13450 | 0.3804 |
1299
- | 0.2386 | 13500 | 0.3666 |
1300
- | 0.2394 | 13550 | 0.3792 |
1301
- | 0.2403 | 13600 | 0.3341 |
1302
- | 0.2412 | 13650 | 0.4906 |
1303
- | 0.2421 | 13700 | 0.541 |
1304
- | 0.2430 | 13750 | 0.3936 |
1305
- | 0.2439 | 13800 | 0.3871 |
1306
- | 0.2447 | 13850 | 0.2983 |
1307
- | 0.2456 | 13900 | 0.4071 |
1308
- | 0.2465 | 13950 | 0.3434 |
1309
- | 0.2474 | 14000 | 0.3885 |
1310
- | 0.2483 | 14050 | 0.4296 |
1311
- | 0.2492 | 14100 | 0.3853 |
1312
- | 0.2500 | 14150 | 0.4068 |
1313
- | 0.2509 | 14200 | 0.4071 |
1314
- | 0.2518 | 14250 | 0.3588 |
1315
- | 0.2527 | 14300 | 0.4161 |
1316
- | 0.2536 | 14350 | 0.3431 |
1317
- | 0.2545 | 14400 | 0.3576 |
1318
- | 0.2553 | 14450 | 0.3563 |
1319
- | 0.2562 | 14500 | 0.3406 |
1320
- | 0.2571 | 14550 | 0.4397 |
1321
- | 0.2580 | 14600 | 0.411 |
1322
- | 0.2589 | 14650 | 0.3742 |
1323
- | 0.2598 | 14700 | 0.3622 |
1324
- | 0.2606 | 14750 | 0.2782 |
1325
- | 0.2615 | 14800 | 0.36 |
1326
- | 0.2624 | 14850 | 0.486 |
1327
- | 0.2633 | 14900 | 0.406 |
1328
- | 0.2642 | 14950 | 0.357 |
1329
- | 0.2651 | 15000 | 0.2855 |
1330
- | 0.2659 | 15050 | 0.4424 |
1331
- | 0.2668 | 15100 | 0.352 |
1332
- | 0.2677 | 15150 | 0.3638 |
1333
- | 0.2686 | 15200 | 0.3727 |
1334
- | 0.2695 | 15250 | 0.3842 |
1335
- | 0.2704 | 15300 | 0.3625 |
1336
- | 0.2712 | 15350 | 0.4088 |
1337
- | 0.2721 | 15400 | 0.4126 |
1338
- | 0.2730 | 15450 | 0.4662 |
1339
- | 0.2739 | 15500 | 0.3889 |
1340
- | 0.2748 | 15550 | 0.3618 |
1341
- | 0.2757 | 15600 | 0.4126 |
1342
- | 0.2765 | 15650 | 0.3771 |
1343
- | 0.2774 | 15700 | 0.4377 |
1344
- | 0.2783 | 15750 | 0.4041 |
1345
- | 0.2792 | 15800 | 0.375 |
1346
- | 0.2801 | 15850 | 0.3339 |
1347
- | 0.2810 | 15900 | 0.348 |
1348
- | 0.2818 | 15950 | 0.367 |
1349
- | 0.2827 | 16000 | 0.3427 |
1350
- | 0.2836 | 16050 | 0.3637 |
1351
- | 0.2845 | 16100 | 0.3489 |
1352
- | 0.2854 | 16150 | 0.4209 |
1353
- | 0.2863 | 16200 | 0.2984 |
1354
- | 0.2871 | 16250 | 0.3877 |
1355
- | 0.2880 | 16300 | 0.3508 |
1356
- | 0.2889 | 16350 | 0.3443 |
1357
- | 0.2898 | 16400 | 0.4346 |
1358
- | 0.2907 | 16450 | 0.4262 |
1359
- | 0.2916 | 16500 | 0.3065 |
1360
- | 0.2924 | 16550 | 0.4003 |
1361
- | 0.2933 | 16600 | 0.5295 |
1362
- | 0.2942 | 16650 | 0.3582 |
1363
- | 0.2951 | 16700 | 0.3299 |
1364
- | 0.2960 | 16750 | 0.3493 |
1365
- | 0.2969 | 16800 | 0.2752 |
1366
- | 0.2978 | 16850 | 0.3254 |
1367
- | 0.2986 | 16900 | 0.3352 |
1368
- | 0.2995 | 16950 | 0.3812 |
1369
- | 0.3004 | 17000 | 0.3332 |
1370
- | 0.3013 | 17050 | 0.3472 |
1371
- | 0.3022 | 17100 | 0.4207 |
1372
- | 0.3031 | 17150 | 0.3849 |
1373
- | 0.3039 | 17200 | 0.3272 |
1374
- | 0.3048 | 17250 | 0.3037 |
1375
- | 0.3057 | 17300 | 0.2924 |
1376
- | 0.3066 | 17350 | 0.4029 |
1377
- | 0.3075 | 17400 | 0.5016 |
1378
- | 0.3084 | 17450 | 0.3648 |
1379
- | 0.3092 | 17500 | 0.3045 |
1380
- | 0.3101 | 17550 | 0.3271 |
1381
- | 0.3110 | 17600 | 0.519 |
1382
- | 0.3119 | 17650 | 0.341 |
1383
- | 0.3128 | 17700 | 0.3352 |
1384
- | 0.3137 | 17750 | 0.4406 |
1385
- | 0.3145 | 17800 | 0.3822 |
1386
- | 0.3154 | 17850 | 0.287 |
1387
- | 0.3163 | 17900 | 0.4349 |
1388
- | 0.3172 | 17950 | 0.3633 |
1389
- | 0.3181 | 18000 | 0.3431 |
1390
- | 0.3190 | 18050 | 0.3986 |
1391
- | 0.3198 | 18100 | 0.3279 |
1392
- | 0.3207 | 18150 | 0.3062 |
1393
- | 0.3216 | 18200 | 0.2973 |
1394
- | 0.3225 | 18250 | 0.4078 |
1395
- | 0.3234 | 18300 | 0.31 |
1396
- | 0.3243 | 18350 | 0.306 |
1397
- | 0.3251 | 18400 | 0.3426 |
1398
- | 0.3260 | 18450 | 0.2807 |
1399
- | 0.3269 | 18500 | 0.3856 |
1400
- | 0.3278 | 18550 | 0.3575 |
1401
- | 0.3287 | 18600 | 0.347 |
1402
- | 0.3296 | 18650 | 0.3195 |
1403
- | 0.3304 | 18700 | 0.3543 |
1404
- | 0.3313 | 18750 | 0.3642 |
1405
- | 0.3322 | 18800 | 0.3415 |
1406
- | 0.3331 | 18850 | 0.3069 |
1407
- | 0.3340 | 18900 | 0.2962 |
1408
- | 0.3349 | 18950 | 0.3156 |
1409
- | 0.3357 | 19000 | 0.3791 |
1410
- | 0.3366 | 19050 | 0.3287 |
1411
- | 0.3375 | 19100 | 0.4034 |
1412
- | 0.3384 | 19150 | 0.3476 |
1413
- | 0.3393 | 19200 | 0.363 |
1414
- | 0.3402 | 19250 | 0.3182 |
1415
- | 0.3410 | 19300 | 0.3238 |
1416
- | 0.3419 | 19350 | 0.3043 |
1417
- | 0.3428 | 19400 | 0.3902 |
1418
- | 0.3437 | 19450 | 0.3085 |
1419
- | 0.3446 | 19500 | 0.3162 |
1420
- | 0.3455 | 19550 | 0.345 |
1421
- | 0.3463 | 19600 | 0.3017 |
1422
- | 0.3472 | 19650 | 0.4232 |
1423
- | 0.3481 | 19700 | 0.3363 |
1424
- | 0.3490 | 19750 | 0.3484 |
1425
- | 0.3499 | 19800 | 0.3719 |
1426
- | 0.3508 | 19850 | 0.3226 |
1427
- | 0.3516 | 19900 | 0.3636 |
1428
- | 0.3525 | 19950 | 0.3704 |
1429
- | 0.3534 | 20000 | 0.3459 |
1430
- | 0.3543 | 20050 | 0.3226 |
1431
- | 0.3552 | 20100 | 0.3256 |
1432
- | 0.3561 | 20150 | 0.3179 |
1433
- | 0.3569 | 20200 | 0.4226 |
1434
- | 0.3578 | 20250 | 0.4196 |
1435
- | 0.3587 | 20300 | 0.3618 |
1436
- | 0.3596 | 20350 | 0.4093 |
1437
- | 0.3605 | 20400 | 0.3051 |
1438
- | 0.3614 | 20450 | 0.3379 |
1439
- | 0.3622 | 20500 | 0.3582 |
1440
- | 0.3631 | 20550 | 0.4004 |
1441
- | 0.3640 | 20600 | 0.33 |
1442
- | 0.3649 | 20650 | 0.3072 |
1443
- | 0.3658 | 20700 | 0.35 |
1444
- | 0.3667 | 20750 | 0.3241 |
1445
- | 0.3675 | 20800 | 0.37 |
1446
- | 0.3684 | 20850 | 0.2689 |
1447
- | 0.3693 | 20900 | 0.4061 |
1448
- | 0.3702 | 20950 | 0.3412 |
1449
- | 0.3711 | 21000 | 0.3619 |
1450
- | 0.3720 | 21050 | 0.314 |
1451
- | 0.3729 | 21100 | 0.2912 |
1452
- | 0.3737 | 21150 | 0.3402 |
1453
- | 0.3746 | 21200 | 0.3462 |
1454
- | 0.3755 | 21250 | 0.2775 |
1455
- | 0.3764 | 21300 | 0.3825 |
1456
- | 0.3773 | 21350 | 0.3931 |
1457
- | 0.3782 | 21400 | 0.4059 |
1458
- | 0.3790 | 21450 | 0.3264 |
1459
- | 0.3799 | 21500 | 0.2605 |
1460
- | 0.3808 | 21550 | 0.3658 |
1461
- | 0.3817 | 21600 | 0.3274 |
1462
- | 0.3826 | 21650 | 0.3094 |
1463
- | 0.3835 | 21700 | 0.3556 |
1464
- | 0.3843 | 21750 | 0.2877 |
1465
- | 0.3852 | 21800 | 0.3203 |
1466
- | 0.3861 | 21850 | 0.2553 |
1467
- | 0.3870 | 21900 | 0.4425 |
1468
- | 0.3879 | 21950 | 0.3249 |
1469
- | 0.3888 | 22000 | 0.3649 |
1470
- | 0.3896 | 22050 | 0.3621 |
1471
- | 0.3905 | 22100 | 0.4232 |
1472
- | 0.3914 | 22150 | 0.266 |
1473
- | 0.3923 | 22200 | 0.3767 |
1474
- | 0.3932 | 22250 | 0.366 |
1475
- | 0.3941 | 22300 | 0.3211 |
1476
- | 0.3949 | 22350 | 0.2505 |
1477
- | 0.3958 | 22400 | 0.3824 |
1478
- | 0.3967 | 22450 | 0.331 |
1479
- | 0.3976 | 22500 | 0.3136 |
1480
- | 0.3985 | 22550 | 0.297 |
1481
- | 0.3994 | 22600 | 0.2933 |
1482
- | 0.4002 | 22650 | 0.4026 |
1483
- | 0.4011 | 22700 | 0.3567 |
1484
- | 0.4020 | 22750 | 0.3711 |
1485
- | 0.4029 | 22800 | 0.378 |
1486
- | 0.4038 | 22850 | 0.355 |
1487
- | 0.4047 | 22900 | 0.3044 |
1488
- | 0.4055 | 22950 | 0.357 |
1489
- | 0.4064 | 23000 | 0.3616 |
1490
- | 0.4073 | 23050 | 0.3139 |
1491
- | 0.4082 | 23100 | 0.3474 |
1492
- | 0.4091 | 23150 | 0.3208 |
1493
- | 0.4100 | 23200 | 0.3798 |
1494
- | 0.4108 | 23250 | 0.3282 |
1495
- | 0.4117 | 23300 | 0.3302 |
1496
- | 0.4126 | 23350 | 0.3599 |
1497
- | 0.4135 | 23400 | 0.3608 |
1498
- | 0.4144 | 23450 | 0.3387 |
1499
- | 0.4153 | 23500 | 0.3987 |
1500
- | 0.4161 | 23550 | 0.3387 |
1501
- | 0.4170 | 23600 | 0.2989 |
1502
- | 0.4179 | 23650 | 0.2629 |
1503
- | 0.4188 | 23700 | 0.2905 |
1504
- | 0.4197 | 23750 | 0.3234 |
1505
- | 0.4206 | 23800 | 0.3063 |
1506
- | 0.4214 | 23850 | 0.3386 |
1507
- | 0.4223 | 23900 | 0.3431 |
1508
- | 0.4232 | 23950 | 0.2902 |
1509
- | 0.4241 | 24000 | 0.3136 |
1510
- | 0.4250 | 24050 | 0.4001 |
1511
- | 0.4259 | 24100 | 0.285 |
1512
- | 0.4267 | 24150 | 0.4717 |
1513
- | 0.4276 | 24200 | 0.3391 |
1514
- | 0.4285 | 24250 | 0.2913 |
1515
- | 0.4294 | 24300 | 0.3417 |
1516
- | 0.4303 | 24350 | 0.2909 |
1517
- | 0.4312 | 24400 | 0.2871 |
1518
- | 0.4320 | 24450 | 0.3929 |
1519
- | 0.4329 | 24500 | 0.278 |
1520
- | 0.4338 | 24550 | 0.4386 |
1521
- | 0.4347 | 24600 | 0.4193 |
1522
- | 0.4356 | 24650 | 0.367 |
1523
- | 0.4365 | 24700 | 0.3474 |
1524
- | 0.4373 | 24750 | 0.3116 |
1525
- | 0.4382 | 24800 | 0.2733 |
1526
- | 0.4391 | 24850 | 0.3046 |
1527
- | 0.4400 | 24900 | 0.3627 |
1528
- | 0.4409 | 24950 | 0.3403 |
1529
- | 0.4418 | 25000 | 0.349 |
1530
- | 0.4426 | 25050 | 0.3527 |
1531
- | 0.4435 | 25100 | 0.3773 |
1532
- | 0.4444 | 25150 | 0.3372 |
1533
- | 0.4453 | 25200 | 0.3178 |
1534
- | 0.4462 | 25250 | 0.2745 |
1535
- | 0.4471 | 25300 | 0.2773 |
1536
- | 0.4480 | 25350 | 0.3822 |
1537
- | 0.4488 | 25400 | 0.3851 |
1538
- | 0.4497 | 25450 | 0.3805 |
1539
- | 0.4506 | 25500 | 0.3245 |
1540
- | 0.4515 | 25550 | 0.2978 |
1541
- | 0.4524 | 25600 | 0.3397 |
1542
- | 0.4533 | 25650 | 0.4655 |
1543
- | 0.4541 | 25700 | 0.2669 |
1544
- | 0.4550 | 25750 | 0.3316 |
1545
- | 0.4559 | 25800 | 0.3395 |
1546
- | 0.4568 | 25850 | 0.3631 |
1547
- | 0.4577 | 25900 | 0.3418 |
1548
- | 0.4586 | 25950 | 0.3351 |
1549
- | 0.4594 | 26000 | 0.3117 |
1550
- | 0.4603 | 26050 | 0.3197 |
1551
- | 0.4612 | 26100 | 0.2856 |
1552
- | 0.4621 | 26150 | 0.3668 |
1553
- | 0.4630 | 26200 | 0.4672 |
1554
- | 0.4639 | 26250 | 0.373 |
1555
- | 0.4647 | 26300 | 0.3232 |
1556
- | 0.4656 | 26350 | 0.3461 |
1557
- | 0.4665 | 26400 | 0.2937 |
1558
 
1559
- </details>
1560
 
1561
  ### Framework Versions
1562
  - Python: 3.11.13
 
1187
  </details>
1188
 
1189
  ### Training Logs
 
 
1190
  | Epoch | Step | Training Loss |
1191
  |:------:|:-----:|:-------------:|
1192
+ | 0.4674 | 26450 | 0.3511 |
1193
+ | 0.4683 | 26500 | 0.3204 |
1194
+ | 0.4692 | 26550 | 0.2698 |
1195
+ | 0.4700 | 26600 | 0.3019 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1196
 
 
1197
 
1198
  ### Framework Versions
1199
  - Python: 3.11.13
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:95db985db084a74db34108a18955cb2f94475e94d71ba1394dc6711c0773d4b7
3
  size 90864192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efcfe5aa0c5005b2003bf869f963a20c876745fa7f627f40c3819a73de45b137
3
  size 90864192
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:99edc411ece53670a76daae12b850cb27d05b6b28f0e1abb1039884ae4ec4d91
3
  size 180609210
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e583966410e9fff5f8ac46b24676c49f860c538a27398df242344bd0d2a7d08c
3
  size 180609210
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3c74149b9485dd85a90bddde2fb89338979661fdbddb3f61abad71930d1b347
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bea9f5fcadeb0cd0d9a943463b72fc5766ebc9e57169008c5a18a91f57902fe
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5aa35335d6835971914c8cd5a825891507e75318afb8a7d46177ecba5e7044a7
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc231b83c50d1c7b06b068e6375671742fac1a054d31705297b772d1dffbf9a2
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8741bd796701c3df080c6afd4a635df632277d4edf7423e34ebd4e6d1a562aa
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93fabb93104e1056a717edb04db9d7d8955bbdc038b375db1506268b873e6fee
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.46650527469032177,
6
  "eval_steps": 500,
7
- "global_step": 26400,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3704,6 +3704,34 @@
3704
  "learning_rate": 2.9647955076476018e-05,
3705
  "loss": 0.2937,
3706
  "step": 26400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3707
  }
3708
  ],
3709
  "logging_steps": 50,
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.4700394055591879,
6
  "eval_steps": 500,
7
+ "global_step": 26600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3704
  "learning_rate": 2.9647955076476018e-05,
3705
  "loss": 0.2937,
3706
  "step": 26400
3707
+ },
3708
+ {
3709
+ "epoch": 0.4673888074075383,
3710
+ "grad_norm": 1.3711694478988647,
3711
+ "learning_rate": 2.9599850778504252e-05,
3712
+ "loss": 0.3511,
3713
+ "step": 26450
3714
+ },
3715
+ {
3716
+ "epoch": 0.4682723401247548,
3717
+ "grad_norm": 3.0807628631591797,
3718
+ "learning_rate": 2.9550764760165717e-05,
3719
+ "loss": 0.3204,
3720
+ "step": 26500
3721
+ },
3722
+ {
3723
+ "epoch": 0.46915587284197136,
3724
+ "grad_norm": 1.5949090719223022,
3725
+ "learning_rate": 2.950167874182718e-05,
3726
+ "loss": 0.2698,
3727
+ "step": 26550
3728
+ },
3729
+ {
3730
+ "epoch": 0.4700394055591879,
3731
+ "grad_norm": 1.6748404502868652,
3732
+ "learning_rate": 2.9452592723488643e-05,
3733
+ "loss": 0.3019,
3734
+ "step": 26600
3735
  }
3736
  ],
3737
  "logging_steps": 50,
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:764686e45491e3136468635b2d7d30b214689849603924e41f88b81ff0c3a1d0
3
  size 5560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fff70b6f1e4863ea80efbdd3030d9dd2b5fc82ab59547db2165691e8a1c93fc
3
  size 5560