guyhadad01 commited on
Commit
d92ddfb
·
verified ·
1 Parent(s): 5e806ee

Training in progress, step 33800, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -1187,156 +1187,13 @@ You can finetune this model on your own dataset.
1187
  </details>
1188
 
1189
  ### Training Logs
1190
- <details><summary>Click to expand</summary>
1191
-
1192
  | Epoch | Step | Training Loss |
1193
  |:------:|:-----:|:-------------:|
1194
- | 0.4674 | 26450 | 0.3511 |
1195
- | 0.4683 | 26500 | 0.3204 |
1196
- | 0.4692 | 26550 | 0.2698 |
1197
- | 0.4700 | 26600 | 0.3019 |
1198
- | 0.4709 | 26650 | 0.276 |
1199
- | 0.4718 | 26700 | 0.2572 |
1200
- | 0.4727 | 26750 | 0.2821 |
1201
- | 0.4736 | 26800 | 0.2753 |
1202
- | 0.4745 | 26850 | 0.2789 |
1203
- | 0.4753 | 26900 | 0.5175 |
1204
- | 0.4762 | 26950 | 0.3398 |
1205
- | 0.4771 | 27000 | 0.3683 |
1206
- | 0.4780 | 27050 | 0.341 |
1207
- | 0.4789 | 27100 | 0.2753 |
1208
- | 0.4798 | 27150 | 0.35 |
1209
- | 0.4806 | 27200 | 0.3143 |
1210
- | 0.4815 | 27250 | 0.3968 |
1211
- | 0.4824 | 27300 | 0.3246 |
1212
- | 0.4833 | 27350 | 0.3131 |
1213
- | 0.4842 | 27400 | 0.3078 |
1214
- | 0.4851 | 27450 | 0.419 |
1215
- | 0.4859 | 27500 | 0.3959 |
1216
- | 0.4868 | 27550 | 0.3754 |
1217
- | 0.4877 | 27600 | 0.3163 |
1218
- | 0.4886 | 27650 | 0.35 |
1219
- | 0.4895 | 27700 | 0.3397 |
1220
- | 0.4904 | 27750 | 0.3853 |
1221
- | 0.4912 | 27800 | 0.2568 |
1222
- | 0.4921 | 27850 | 0.3108 |
1223
- | 0.4930 | 27900 | 0.4098 |
1224
- | 0.4939 | 27950 | 0.3364 |
1225
- | 0.4948 | 28000 | 0.3354 |
1226
- | 0.4957 | 28050 | 0.2879 |
1227
- | 0.4965 | 28100 | 0.3604 |
1228
- | 0.4974 | 28150 | 0.2612 |
1229
- | 0.4983 | 28200 | 0.3593 |
1230
- | 0.4992 | 28250 | 0.2961 |
1231
- | 0.5001 | 28300 | 0.3338 |
1232
- | 0.5010 | 28350 | 0.3109 |
1233
- | 0.5018 | 28400 | 0.3234 |
1234
- | 0.5027 | 28450 | 0.3257 |
1235
- | 0.5036 | 28500 | 0.4675 |
1236
- | 0.5045 | 28550 | 0.4318 |
1237
- | 0.5054 | 28600 | 0.3594 |
1238
- | 0.5063 | 28650 | 0.3214 |
1239
- | 0.5071 | 28700 | 0.2856 |
1240
- | 0.5080 | 28750 | 0.3094 |
1241
- | 0.5089 | 28800 | 0.3933 |
1242
- | 0.5098 | 28850 | 0.3432 |
1243
- | 0.5107 | 28900 | 0.3766 |
1244
- | 0.5116 | 28950 | 0.3308 |
1245
- | 0.5124 | 29000 | 0.3453 |
1246
- | 0.5133 | 29050 | 0.2904 |
1247
- | 0.5142 | 29100 | 0.2647 |
1248
- | 0.5151 | 29150 | 0.4395 |
1249
- | 0.5160 | 29200 | 0.295 |
1250
- | 0.5169 | 29250 | 0.3927 |
1251
- | 0.5178 | 29300 | 0.3492 |
1252
- | 0.5186 | 29350 | 0.3304 |
1253
- | 0.5195 | 29400 | 0.3557 |
1254
- | 0.5204 | 29450 | 0.3389 |
1255
- | 0.5213 | 29500 | 0.3322 |
1256
- | 0.5222 | 29550 | 0.3053 |
1257
- | 0.5231 | 29600 | 0.2486 |
1258
- | 0.5239 | 29650 | 0.282 |
1259
- | 0.5248 | 29700 | 0.3791 |
1260
- | 0.5257 | 29750 | 0.3346 |
1261
- | 0.5266 | 29800 | 0.2743 |
1262
- | 0.5275 | 29850 | 0.2927 |
1263
- | 0.5284 | 29900 | 0.3775 |
1264
- | 0.5292 | 29950 | 0.3114 |
1265
- | 0.5301 | 30000 | 0.2383 |
1266
- | 0.5310 | 30050 | 0.3798 |
1267
- | 0.5319 | 30100 | 0.3204 |
1268
- | 0.5328 | 30150 | 0.2496 |
1269
- | 0.5337 | 30200 | 0.4147 |
1270
- | 0.5345 | 30250 | 0.3021 |
1271
- | 0.5354 | 30300 | 0.2758 |
1272
- | 0.5363 | 30350 | 0.3166 |
1273
- | 0.5372 | 30400 | 0.35 |
1274
- | 0.5381 | 30450 | 0.3391 |
1275
- | 0.5390 | 30500 | 0.3576 |
1276
- | 0.5398 | 30550 | 0.295 |
1277
- | 0.5407 | 30600 | 0.3449 |
1278
- | 0.5416 | 30650 | 0.3274 |
1279
- | 0.5425 | 30700 | 0.3094 |
1280
- | 0.5434 | 30750 | 0.3077 |
1281
- | 0.5443 | 30800 | 0.3505 |
1282
- | 0.5451 | 30850 | 0.3485 |
1283
- | 0.5460 | 30900 | 0.331 |
1284
- | 0.5469 | 30950 | 0.2846 |
1285
- | 0.5478 | 31000 | 0.3647 |
1286
- | 0.5487 | 31050 | 0.3475 |
1287
- | 0.5496 | 31100 | 0.2833 |
1288
- | 0.5504 | 31150 | 0.3 |
1289
- | 0.5513 | 31200 | 0.3568 |
1290
- | 0.5522 | 31250 | 0.3268 |
1291
- | 0.5531 | 31300 | 0.4005 |
1292
- | 0.5540 | 31350 | 0.2993 |
1293
- | 0.5549 | 31400 | 0.3463 |
1294
- | 0.5557 | 31450 | 0.3654 |
1295
- | 0.5566 | 31500 | 0.3329 |
1296
- | 0.5575 | 31550 | 0.2794 |
1297
- | 0.5584 | 31600 | 0.4189 |
1298
- | 0.5593 | 31650 | 0.3643 |
1299
- | 0.5602 | 31700 | 0.3578 |
1300
- | 0.5610 | 31750 | 0.3193 |
1301
- | 0.5619 | 31800 | 0.327 |
1302
- | 0.5628 | 31850 | 0.3429 |
1303
- | 0.5637 | 31900 | 0.2994 |
1304
- | 0.5646 | 31950 | 0.3219 |
1305
- | 0.5655 | 32000 | 0.2902 |
1306
- | 0.5663 | 32050 | 0.3896 |
1307
- | 0.5672 | 32100 | 0.2491 |
1308
- | 0.5681 | 32150 | 0.2663 |
1309
- | 0.5690 | 32200 | 0.3433 |
1310
- | 0.5699 | 32250 | 0.3375 |
1311
- | 0.5708 | 32300 | 0.2891 |
1312
- | 0.5716 | 32350 | 0.296 |
1313
- | 0.5725 | 32400 | 0.2478 |
1314
- | 0.5734 | 32450 | 0.3514 |
1315
- | 0.5743 | 32500 | 0.2741 |
1316
- | 0.5752 | 32550 | 0.3546 |
1317
- | 0.5761 | 32600 | 0.3927 |
1318
- | 0.5769 | 32650 | 0.2725 |
1319
- | 0.5778 | 32700 | 0.3167 |
1320
- | 0.5787 | 32750 | 0.3249 |
1321
- | 0.5796 | 32800 | 0.2443 |
1322
- | 0.5805 | 32850 | 0.4113 |
1323
- | 0.5814 | 32900 | 0.3106 |
1324
- | 0.5822 | 32950 | 0.2841 |
1325
- | 0.5831 | 33000 | 0.2786 |
1326
- | 0.5840 | 33050 | 0.3576 |
1327
- | 0.5849 | 33100 | 0.2475 |
1328
- | 0.5858 | 33150 | 0.348 |
1329
- | 0.5867 | 33200 | 0.2779 |
1330
- | 0.5875 | 33250 | 0.3166 |
1331
- | 0.5884 | 33300 | 0.3448 |
1332
- | 0.5893 | 33350 | 0.2409 |
1333
- | 0.5902 | 33400 | 0.3313 |
1334
- | 0.5911 | 33450 | 0.2981 |
1335
- | 0.5920 | 33500 | 0.269 |
1336
- | 0.5929 | 33550 | 0.4098 |
1337
- | 0.5937 | 33600 | 0.2924 |
1338
 
1339
- </details>
1340
 
1341
  ### Framework Versions
1342
  - Python: 3.11.13
 
1187
  </details>
1188
 
1189
  ### Training Logs
 
 
1190
  | Epoch | Step | Training Loss |
1191
  |:------:|:-----:|:-------------:|
1192
+ | 0.5946 | 33650 | 0.2952 |
1193
+ | 0.5955 | 33700 | 0.2754 |
1194
+ | 0.5964 | 33750 | 0.3434 |
1195
+ | 0.5973 | 33800 | 0.2541 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1196
 
 
1197
 
1198
  ### Framework Versions
1199
  - Python: 3.11.13
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f534df9eabe25064d029fb2d1fd136283ef967f7afc887a51be26077e972e34
3
  size 90864192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f44c393193a7debcaa9fe116b2e33229c62b31c7228329c28ad0491e8701e1a
3
  size 90864192
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b9243475a6ea2e44bb15f370f31b09dd572fa23757f65fe3e9261c3c93a8462
3
  size 180609210
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:620d6a0f0e8b3c0b610d1a9d8d426a0e427c22a7ec7ed38356be652403968d4e
3
  size 180609210
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd6945cb5a30962738a328e18b0f7ded3e18c1657315150f6cef20738ac2b26e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ba676fcdff1c56d78801dd51dede93231c1d56645e7877743bcba848aec097a
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca24b530a082040eb648f26b5b54b5a0f292c3ed7dea28d1bd9dbe2dcf563558
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b205c3d2c6273622b727175547a26240710159f4256bcd6246156ce73b10ee3f
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55b84df2ef27071e1bcf2b8efc3d6b7785ff7a24f59d685fade7a53af7a2593d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ff383a89d52f883cafae387eb5146463fb8074271a1417f846316599e03e648
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.5937339859695004,
6
  "eval_steps": 500,
7
- "global_step": 33600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -4712,6 +4712,34 @@
4712
  "learning_rate": 2.258251359682708e-05,
4713
  "loss": 0.2924,
4714
  "step": 33600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4715
  }
4716
  ],
4717
  "logging_steps": 50,
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.5972681168383666,
6
  "eval_steps": 500,
7
+ "global_step": 33800,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
4712
  "learning_rate": 2.258251359682708e-05,
4713
  "loss": 0.2924,
4714
  "step": 33600
4715
+ },
4716
+ {
4717
+ "epoch": 0.594617518686717,
4718
+ "grad_norm": 2.0076584815979004,
4719
+ "learning_rate": 2.2533427578488545e-05,
4720
+ "loss": 0.2952,
4721
+ "step": 33650
4722
+ },
4723
+ {
4724
+ "epoch": 0.5955010514039335,
4725
+ "grad_norm": 1.203574299812317,
4726
+ "learning_rate": 2.2484341560150006e-05,
4727
+ "loss": 0.2754,
4728
+ "step": 33700
4729
+ },
4730
+ {
4731
+ "epoch": 0.59638458412115,
4732
+ "grad_norm": 2.815420150756836,
4733
+ "learning_rate": 2.243525554181147e-05,
4734
+ "loss": 0.3434,
4735
+ "step": 33750
4736
+ },
4737
+ {
4738
+ "epoch": 0.5972681168383666,
4739
+ "grad_norm": 1.487236499786377,
4740
+ "learning_rate": 2.2386169523472935e-05,
4741
+ "loss": 0.2541,
4742
+ "step": 33800
4743
  }
4744
  ],
4745
  "logging_steps": 50,
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6fff70b6f1e4863ea80efbdd3030d9dd2b5fc82ab59547db2165691e8a1c93fc
3
  size 5560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25a21a534e4993b863994e64d84a120efcce8aac5f212cbacbdb8f1e5edfbb2e
3
  size 5560