guyhadad01 commited on
Commit
7051f21
·
verified ·
1 Parent(s): 0a94a39

Training in progress, step 45000, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -1187,236 +1187,13 @@ You can finetune this model on your own dataset.
1187
  </details>
1188
 
1189
  ### Training Logs
1190
- <details><summary>Click to expand</summary>
1191
-
1192
  | Epoch | Step | Training Loss |
1193
  |:------:|:-----:|:-------------:|
1194
- | 0.5946 | 33650 | 0.2952 |
1195
- | 0.5955 | 33700 | 0.2754 |
1196
- | 0.5964 | 33750 | 0.3434 |
1197
- | 0.5973 | 33800 | 0.2541 |
1198
- | 0.5982 | 33850 | 0.2923 |
1199
- | 0.5990 | 33900 | 0.2361 |
1200
- | 0.5999 | 33950 | 0.4459 |
1201
- | 0.6008 | 34000 | 0.3169 |
1202
- | 0.6017 | 34050 | 0.3146 |
1203
- | 0.6026 | 34100 | 0.3125 |
1204
- | 0.6035 | 34150 | 0.2738 |
1205
- | 0.6043 | 34200 | 0.3062 |
1206
- | 0.6052 | 34250 | 0.3566 |
1207
- | 0.6061 | 34300 | 0.3466 |
1208
- | 0.6070 | 34350 | 0.2608 |
1209
- | 0.6079 | 34400 | 0.3239 |
1210
- | 0.6088 | 34450 | 0.3557 |
1211
- | 0.6096 | 34500 | 0.4175 |
1212
- | 0.6105 | 34550 | 0.3104 |
1213
- | 0.6114 | 34600 | 0.3305 |
1214
- | 0.6123 | 34650 | 0.2832 |
1215
- | 0.6132 | 34700 | 0.3007 |
1216
- | 0.6141 | 34750 | 0.3932 |
1217
- | 0.6149 | 34800 | 0.3 |
1218
- | 0.6158 | 34850 | 0.2785 |
1219
- | 0.6167 | 34900 | 0.3015 |
1220
- | 0.6176 | 34950 | 0.3291 |
1221
- | 0.6185 | 35000 | 0.2634 |
1222
- | 0.6194 | 35050 | 0.3212 |
1223
- | 0.6202 | 35100 | 0.2965 |
1224
- | 0.6211 | 35150 | 0.3557 |
1225
- | 0.6220 | 35200 | 0.3535 |
1226
- | 0.6229 | 35250 | 0.4183 |
1227
- | 0.6238 | 35300 | 0.3355 |
1228
- | 0.6247 | 35350 | 0.3626 |
1229
- | 0.6255 | 35400 | 0.2542 |
1230
- | 0.6264 | 35450 | 0.2555 |
1231
- | 0.6273 | 35500 | 0.3418 |
1232
- | 0.6282 | 35550 | 0.3118 |
1233
- | 0.6291 | 35600 | 0.2889 |
1234
- | 0.6300 | 35650 | 0.319 |
1235
- | 0.6308 | 35700 | 0.312 |
1236
- | 0.6317 | 35750 | 0.2776 |
1237
- | 0.6326 | 35800 | 0.2574 |
1238
- | 0.6335 | 35850 | 0.2816 |
1239
- | 0.6344 | 35900 | 0.3392 |
1240
- | 0.6353 | 35950 | 0.2856 |
1241
- | 0.6361 | 36000 | 0.286 |
1242
- | 0.6370 | 36050 | 0.3555 |
1243
- | 0.6379 | 36100 | 0.3065 |
1244
- | 0.6388 | 36150 | 0.3696 |
1245
- | 0.6397 | 36200 | 0.2626 |
1246
- | 0.6406 | 36250 | 0.2681 |
1247
- | 0.6414 | 36300 | 0.3015 |
1248
- | 0.6423 | 36350 | 0.3587 |
1249
- | 0.6432 | 36400 | 0.4138 |
1250
- | 0.6441 | 36450 | 0.3785 |
1251
- | 0.6450 | 36500 | 0.3385 |
1252
- | 0.6459 | 36550 | 0.3423 |
1253
- | 0.6467 | 36600 | 0.3793 |
1254
- | 0.6476 | 36650 | 0.3819 |
1255
- | 0.6485 | 36700 | 0.3111 |
1256
- | 0.6494 | 36750 | 0.3299 |
1257
- | 0.6503 | 36800 | 0.3994 |
1258
- | 0.6512 | 36850 | 0.3595 |
1259
- | 0.6520 | 36900 | 0.3572 |
1260
- | 0.6529 | 36950 | 0.3085 |
1261
- | 0.6538 | 37000 | 0.2647 |
1262
- | 0.6547 | 37050 | 0.2622 |
1263
- | 0.6556 | 37100 | 0.3814 |
1264
- | 0.6565 | 37150 | 0.4174 |
1265
- | 0.6573 | 37200 | 0.3473 |
1266
- | 0.6582 | 37250 | 0.3598 |
1267
- | 0.6591 | 37300 | 0.2822 |
1268
- | 0.6600 | 37350 | 0.2538 |
1269
- | 0.6609 | 37400 | 0.2212 |
1270
- | 0.6618 | 37450 | 0.2687 |
1271
- | 0.6626 | 37500 | 0.3288 |
1272
- | 0.6635 | 37550 | 0.4221 |
1273
- | 0.6644 | 37600 | 0.3711 |
1274
- | 0.6653 | 37650 | 0.2418 |
1275
- | 0.6662 | 37700 | 0.3 |
1276
- | 0.6671 | 37750 | 0.3147 |
1277
- | 0.6680 | 37800 | 0.2895 |
1278
- | 0.6688 | 37850 | 0.2649 |
1279
- | 0.6697 | 37900 | 0.287 |
1280
- | 0.6706 | 37950 | 0.267 |
1281
- | 0.6715 | 38000 | 0.2862 |
1282
- | 0.6724 | 38050 | 0.2931 |
1283
- | 0.6733 | 38100 | 0.3039 |
1284
- | 0.6741 | 38150 | 0.3067 |
1285
- | 0.6750 | 38200 | 0.3072 |
1286
- | 0.6759 | 38250 | 0.2744 |
1287
- | 0.6768 | 38300 | 0.3098 |
1288
- | 0.6777 | 38350 | 0.2837 |
1289
- | 0.6786 | 38400 | 0.2998 |
1290
- | 0.6794 | 38450 | 0.2477 |
1291
- | 0.6803 | 38500 | 0.3343 |
1292
- | 0.6812 | 38550 | 0.2411 |
1293
- | 0.6821 | 38600 | 0.2641 |
1294
- | 0.6830 | 38650 | 0.3673 |
1295
- | 0.6839 | 38700 | 0.3456 |
1296
- | 0.6847 | 38750 | 0.2554 |
1297
- | 0.6856 | 38800 | 0.2289 |
1298
- | 0.6865 | 38850 | 0.2409 |
1299
- | 0.6874 | 38900 | 0.3395 |
1300
- | 0.6883 | 38950 | 0.3296 |
1301
- | 0.6892 | 39000 | 0.349 |
1302
- | 0.6900 | 39050 | 0.2841 |
1303
- | 0.6909 | 39100 | 0.3101 |
1304
- | 0.6918 | 39150 | 0.3094 |
1305
- | 0.6927 | 39200 | 0.2485 |
1306
- | 0.6936 | 39250 | 0.2985 |
1307
- | 0.6945 | 39300 | 0.3039 |
1308
- | 0.6953 | 39350 | 0.2616 |
1309
- | 0.6962 | 39400 | 0.2932 |
1310
- | 0.6971 | 39450 | 0.3707 |
1311
- | 0.6980 | 39500 | 0.2728 |
1312
- | 0.6989 | 39550 | 0.2482 |
1313
- | 0.6998 | 39600 | 0.3475 |
1314
- | 0.7006 | 39650 | 0.3447 |
1315
- | 0.7015 | 39700 | 0.2503 |
1316
- | 0.7024 | 39750 | 0.3593 |
1317
- | 0.7033 | 39800 | 0.2933 |
1318
- | 0.7042 | 39850 | 0.3565 |
1319
- | 0.7051 | 39900 | 0.2838 |
1320
- | 0.7059 | 39950 | 0.2604 |
1321
- | 0.7068 | 40000 | 0.2286 |
1322
- | 0.7077 | 40050 | 0.2766 |
1323
- | 0.7086 | 40100 | 0.2649 |
1324
- | 0.7095 | 40150 | 0.3184 |
1325
- | 0.7104 | 40200 | 0.334 |
1326
- | 0.7112 | 40250 | 0.3609 |
1327
- | 0.7121 | 40300 | 0.306 |
1328
- | 0.7130 | 40350 | 0.2629 |
1329
- | 0.7139 | 40400 | 0.2311 |
1330
- | 0.7148 | 40450 | 0.2909 |
1331
- | 0.7157 | 40500 | 0.343 |
1332
- | 0.7165 | 40550 | 0.3013 |
1333
- | 0.7174 | 40600 | 0.3128 |
1334
- | 0.7183 | 40650 | 0.2073 |
1335
- | 0.7192 | 40700 | 0.2873 |
1336
- | 0.7201 | 40750 | 0.3705 |
1337
- | 0.7210 | 40800 | 0.2985 |
1338
- | 0.7218 | 40850 | 0.3355 |
1339
- | 0.7227 | 40900 | 0.3322 |
1340
- | 0.7236 | 40950 | 0.3424 |
1341
- | 0.7245 | 41000 | 0.2834 |
1342
- | 0.7254 | 41050 | 0.2776 |
1343
- | 0.7263 | 41100 | 0.2924 |
1344
- | 0.7271 | 41150 | 0.3982 |
1345
- | 0.7280 | 41200 | 0.3379 |
1346
- | 0.7289 | 41250 | 0.2876 |
1347
- | 0.7298 | 41300 | 0.3289 |
1348
- | 0.7307 | 41350 | 0.3119 |
1349
- | 0.7316 | 41400 | 0.3007 |
1350
- | 0.7324 | 41450 | 0.3499 |
1351
- | 0.7333 | 41500 | 0.3016 |
1352
- | 0.7342 | 41550 | 0.3232 |
1353
- | 0.7351 | 41600 | 0.2577 |
1354
- | 0.7360 | 41650 | 0.2928 |
1355
- | 0.7369 | 41700 | 0.2814 |
1356
- | 0.7377 | 41750 | 0.2962 |
1357
- | 0.7386 | 41800 | 0.2756 |
1358
- | 0.7395 | 41850 | 0.3287 |
1359
- | 0.7404 | 41900 | 0.34 |
1360
- | 0.7413 | 41950 | 0.3402 |
1361
- | 0.7422 | 42000 | 0.2404 |
1362
- | 0.7431 | 42050 | 0.2655 |
1363
- | 0.7439 | 42100 | 0.3288 |
1364
- | 0.7448 | 42150 | 0.3333 |
1365
- | 0.7457 | 42200 | 0.3602 |
1366
- | 0.7466 | 42250 | 0.3631 |
1367
- | 0.7475 | 42300 | 0.2374 |
1368
- | 0.7484 | 42350 | 0.3347 |
1369
- | 0.7492 | 42400 | 0.3031 |
1370
- | 0.7501 | 42450 | 0.2817 |
1371
- | 0.7510 | 42500 | 0.3433 |
1372
- | 0.7519 | 42550 | 0.3273 |
1373
- | 0.7528 | 42600 | 0.3123 |
1374
- | 0.7537 | 42650 | 0.29 |
1375
- | 0.7545 | 42700 | 0.3352 |
1376
- | 0.7554 | 42750 | 0.3143 |
1377
- | 0.7563 | 42800 | 0.2833 |
1378
- | 0.7572 | 42850 | 0.2851 |
1379
- | 0.7581 | 42900 | 0.2956 |
1380
- | 0.7590 | 42950 | 0.3038 |
1381
- | 0.7598 | 43000 | 0.2195 |
1382
- | 0.7607 | 43050 | 0.2588 |
1383
- | 0.7616 | 43100 | 0.3065 |
1384
- | 0.7625 | 43150 | 0.3252 |
1385
- | 0.7634 | 43200 | 0.2418 |
1386
- | 0.7643 | 43250 | 0.4002 |
1387
- | 0.7651 | 43300 | 0.3379 |
1388
- | 0.7660 | 43350 | 0.3574 |
1389
- | 0.7669 | 43400 | 0.3664 |
1390
- | 0.7678 | 43450 | 0.3274 |
1391
- | 0.7687 | 43500 | 0.2443 |
1392
- | 0.7696 | 43550 | 0.2998 |
1393
- | 0.7704 | 43600 | 0.3619 |
1394
- | 0.7713 | 43650 | 0.2586 |
1395
- | 0.7722 | 43700 | 0.251 |
1396
- | 0.7731 | 43750 | 0.3154 |
1397
- | 0.7740 | 43800 | 0.3309 |
1398
- | 0.7749 | 43850 | 0.2768 |
1399
- | 0.7757 | 43900 | 0.3049 |
1400
- | 0.7766 | 43950 | 0.2939 |
1401
- | 0.7775 | 44000 | 0.2909 |
1402
- | 0.7784 | 44050 | 0.2157 |
1403
- | 0.7793 | 44100 | 0.2785 |
1404
- | 0.7802 | 44150 | 0.2386 |
1405
- | 0.7810 | 44200 | 0.3297 |
1406
- | 0.7819 | 44250 | 0.2576 |
1407
- | 0.7828 | 44300 | 0.2678 |
1408
- | 0.7837 | 44350 | 0.3625 |
1409
- | 0.7846 | 44400 | 0.311 |
1410
- | 0.7855 | 44450 | 0.2924 |
1411
- | 0.7863 | 44500 | 0.2602 |
1412
- | 0.7872 | 44550 | 0.3936 |
1413
- | 0.7881 | 44600 | 0.229 |
1414
- | 0.7890 | 44650 | 0.3017 |
1415
- | 0.7899 | 44700 | 0.2795 |
1416
- | 0.7908 | 44750 | 0.279 |
1417
- | 0.7916 | 44800 | 0.227 |
1418
 
1419
- </details>
1420
 
1421
  ### Framework Versions
1422
  - Python: 3.11.13
 
1187
  </details>
1188
 
1189
  ### Training Logs
 
 
1190
  | Epoch | Step | Training Loss |
1191
  |:------:|:-----:|:-------------:|
1192
+ | 0.7925 | 44850 | 0.25 |
1193
+ | 0.7934 | 44900 | 0.3381 |
1194
+ | 0.7943 | 44950 | 0.2914 |
1195
+ | 0.7952 | 45000 | 0.282 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1196
 
 
1197
 
1198
  ### Framework Versions
1199
  - Python: 3.11.13
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2970aabf3a655ac85113d5b6672798ff4b6430f64aad4045ed8ffd01109ee0a4
3
  size 90864192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ef76b6fee8c0fd9556900a05eb0770041bcc5a9708508b78f491641cac655f8
3
  size 90864192
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce8b8c1496f83dcf4b7c667f04fccd67c7d43b788742223114f77d71b8b17910
3
  size 180609210
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb08a5d8f04e486ccde9a85e4e9299751359a9d9614ffc3531e3deafb4e0a876
3
  size 180609210
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb56f2c027efc838c67094a45905db4a06a6d3525086412dec63fe94b6326cc5
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6e372030ae72b97fbc6cc5e639f35bf2df20cbde1f7751bd5565217fb58eec7
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d02bc78b22ccfd2d2cb2a9e32bb7cd76760001fe2e9b8f3babc4091b78fde49
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c78515125dc6571e473b9ee78fd09306ea3ef81a3f639c80068abccc6924eae
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d1b4b34303310b51ebb01ce33c3b39156dd3c7f40c71fbb94511f9c8503c5fa
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8371b7f9012f9b7db42f400823445baf760f1582673ba8da16f0fa77bea3fc99
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.7916453146260006,
6
  "eval_steps": 500,
7
- "global_step": 44800,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -6280,6 +6280,34 @@
6280
  "learning_rate": 1.1592154090828769e-05,
6281
  "loss": 0.227,
6282
  "step": 44800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6283
  }
6284
  ],
6285
  "logging_steps": 50,
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.7951794454948666,
6
  "eval_steps": 500,
7
+ "global_step": 45000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
6280
  "learning_rate": 1.1592154090828769e-05,
6281
  "loss": 0.227,
6282
  "step": 44800
6283
+ },
6284
+ {
6285
+ "epoch": 0.7925288473432172,
6286
+ "grad_norm": 1.5428054332733154,
6287
+ "learning_rate": 1.1543068072490232e-05,
6288
+ "loss": 0.25,
6289
+ "step": 44850
6290
+ },
6291
+ {
6292
+ "epoch": 0.7934123800604337,
6293
+ "grad_norm": 4.3441057205200195,
6294
+ "learning_rate": 1.1494963774518466e-05,
6295
+ "loss": 0.3381,
6296
+ "step": 44900
6297
+ },
6298
+ {
6299
+ "epoch": 0.7942959127776501,
6300
+ "grad_norm": 1.589739203453064,
6301
+ "learning_rate": 1.1445877756179931e-05,
6302
+ "loss": 0.2914,
6303
+ "step": 44950
6304
+ },
6305
+ {
6306
+ "epoch": 0.7951794454948666,
6307
+ "grad_norm": 1.8874093294143677,
6308
+ "learning_rate": 1.1396791737841392e-05,
6309
+ "loss": 0.282,
6310
+ "step": 45000
6311
  }
6312
  ],
6313
  "logging_steps": 50,
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25a21a534e4993b863994e64d84a120efcce8aac5f212cbacbdb8f1e5edfbb2e
3
  size 5560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b97480b9506272973ef63d3e54eedf906636737574e44c7ac22ad4421393b708
3
  size 5560