guyhadad01 commited on
Commit
99497f0
·
verified ·
1 Parent(s): 1fb376b

Training in progress, step 51600, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -1187,144 +1187,13 @@ You can finetune this model on your own dataset.
1187
  </details>
1188
 
1189
  ### Training Logs
1190
- <details><summary>Click to expand</summary>
1191
-
1192
  | Epoch | Step | Training Loss |
1193
  |:------:|:-----:|:-------------:|
1194
- | 0.7925 | 44850 | 0.25 |
1195
- | 0.7934 | 44900 | 0.3381 |
1196
- | 0.7943 | 44950 | 0.2914 |
1197
- | 0.7952 | 45000 | 0.282 |
1198
- | 0.7961 | 45050 | 0.2596 |
1199
- | 0.7969 | 45100 | 0.311 |
1200
- | 0.7978 | 45150 | 0.2368 |
1201
- | 0.7987 | 45200 | 0.3147 |
1202
- | 0.7996 | 45250 | 0.2964 |
1203
- | 0.8005 | 45300 | 0.34 |
1204
- | 0.8014 | 45350 | 0.3249 |
1205
- | 0.8022 | 45400 | 0.4092 |
1206
- | 0.8031 | 45450 | 0.2601 |
1207
- | 0.8040 | 45500 | 0.306 |
1208
- | 0.8049 | 45550 | 0.2888 |
1209
- | 0.8058 | 45600 | 0.3101 |
1210
- | 0.8067 | 45650 | 0.3148 |
1211
- | 0.8075 | 45700 | 0.3975 |
1212
- | 0.8084 | 45750 | 0.391 |
1213
- | 0.8093 | 45800 | 0.3055 |
1214
- | 0.8102 | 45850 | 0.2434 |
1215
- | 0.8111 | 45900 | 0.285 |
1216
- | 0.8120 | 45950 | 0.3952 |
1217
- | 0.8129 | 46000 | 0.2802 |
1218
- | 0.8137 | 46050 | 0.2687 |
1219
- | 0.8146 | 46100 | 0.2787 |
1220
- | 0.8155 | 46150 | 0.2943 |
1221
- | 0.8164 | 46200 | 0.3386 |
1222
- | 0.8173 | 46250 | 0.3227 |
1223
- | 0.8182 | 46300 | 0.2582 |
1224
- | 0.8190 | 46350 | 0.285 |
1225
- | 0.8199 | 46400 | 0.2989 |
1226
- | 0.8208 | 46450 | 0.2761 |
1227
- | 0.8217 | 46500 | 0.299 |
1228
- | 0.8226 | 46550 | 0.2908 |
1229
- | 0.8235 | 46600 | 0.3134 |
1230
- | 0.8243 | 46650 | 0.2603 |
1231
- | 0.8252 | 46700 | 0.2965 |
1232
- | 0.8261 | 46750 | 0.2506 |
1233
- | 0.8270 | 46800 | 0.2258 |
1234
- | 0.8279 | 46850 | 0.2209 |
1235
- | 0.8288 | 46900 | 0.2756 |
1236
- | 0.8296 | 46950 | 0.3345 |
1237
- | 0.8305 | 47000 | 0.3016 |
1238
- | 0.8314 | 47050 | 0.2712 |
1239
- | 0.8323 | 47100 | 0.3721 |
1240
- | 0.8332 | 47150 | 0.3483 |
1241
- | 0.8341 | 47200 | 0.3002 |
1242
- | 0.8349 | 47250 | 0.2333 |
1243
- | 0.8358 | 47300 | 0.3043 |
1244
- | 0.8367 | 47350 | 0.2992 |
1245
- | 0.8376 | 47400 | 0.3367 |
1246
- | 0.8385 | 47450 | 0.3135 |
1247
- | 0.8394 | 47500 | 0.2681 |
1248
- | 0.8402 | 47550 | 0.2764 |
1249
- | 0.8411 | 47600 | 0.3211 |
1250
- | 0.8420 | 47650 | 0.3081 |
1251
- | 0.8429 | 47700 | 0.2929 |
1252
- | 0.8438 | 47750 | 0.3466 |
1253
- | 0.8447 | 47800 | 0.3012 |
1254
- | 0.8455 | 47850 | 0.2953 |
1255
- | 0.8464 | 47900 | 0.2914 |
1256
- | 0.8473 | 47950 | 0.3219 |
1257
- | 0.8482 | 48000 | 0.3104 |
1258
- | 0.8491 | 48050 | 0.3676 |
1259
- | 0.8500 | 48100 | 0.2989 |
1260
- | 0.8508 | 48150 | 0.3259 |
1261
- | 0.8517 | 48200 | 0.2949 |
1262
- | 0.8526 | 48250 | 0.3914 |
1263
- | 0.8535 | 48300 | 0.2645 |
1264
- | 0.8544 | 48350 | 0.3358 |
1265
- | 0.8553 | 48400 | 0.2939 |
1266
- | 0.8561 | 48450 | 0.4063 |
1267
- | 0.8570 | 48500 | 0.3492 |
1268
- | 0.8579 | 48550 | 0.2794 |
1269
- | 0.8588 | 48600 | 0.2854 |
1270
- | 0.8597 | 48650 | 0.2664 |
1271
- | 0.8606 | 48700 | 0.3028 |
1272
- | 0.8614 | 48750 | 0.2579 |
1273
- | 0.8623 | 48800 | 0.3404 |
1274
- | 0.8632 | 48850 | 0.3535 |
1275
- | 0.8641 | 48900 | 0.2224 |
1276
- | 0.8650 | 48950 | 0.2701 |
1277
- | 0.8659 | 49000 | 0.2506 |
1278
- | 0.8667 | 49050 | 0.3032 |
1279
- | 0.8676 | 49100 | 0.3881 |
1280
- | 0.8685 | 49150 | 0.253 |
1281
- | 0.8694 | 49200 | 0.2827 |
1282
- | 0.8703 | 49250 | 0.266 |
1283
- | 0.8712 | 49300 | 0.3008 |
1284
- | 0.8720 | 49350 | 0.3406 |
1285
- | 0.8729 | 49400 | 0.3348 |
1286
- | 0.8738 | 49450 | 0.3021 |
1287
- | 0.8747 | 49500 | 0.3182 |
1288
- | 0.8756 | 49550 | 0.3063 |
1289
- | 0.8765 | 49600 | 0.2385 |
1290
- | 0.8773 | 49650 | 0.3152 |
1291
- | 0.8782 | 49700 | 0.267 |
1292
- | 0.8791 | 49750 | 0.2599 |
1293
- | 0.8800 | 49800 | 0.3226 |
1294
- | 0.8809 | 49850 | 0.3516 |
1295
- | 0.8818 | 49900 | 0.2912 |
1296
- | 0.8826 | 49950 | 0.2853 |
1297
- | 0.8835 | 50000 | 0.3212 |
1298
- | 0.8844 | 50050 | 0.3399 |
1299
- | 0.8853 | 50100 | 0.3104 |
1300
- | 0.8862 | 50150 | 0.368 |
1301
- | 0.8871 | 50200 | 0.2848 |
1302
- | 0.8880 | 50250 | 0.4137 |
1303
- | 0.8888 | 50300 | 0.3597 |
1304
- | 0.8897 | 50350 | 0.3246 |
1305
- | 0.8906 | 50400 | 0.2658 |
1306
- | 0.8915 | 50450 | 0.2954 |
1307
- | 0.8924 | 50500 | 0.3035 |
1308
- | 0.8933 | 50550 | 0.2654 |
1309
- | 0.8941 | 50600 | 0.2742 |
1310
- | 0.8950 | 50650 | 0.3435 |
1311
- | 0.8959 | 50700 | 0.2947 |
1312
- | 0.8968 | 50750 | 0.3013 |
1313
- | 0.8977 | 50800 | 0.3845 |
1314
- | 0.8986 | 50850 | 0.2882 |
1315
- | 0.8994 | 50900 | 0.3639 |
1316
- | 0.9003 | 50950 | 0.2332 |
1317
- | 0.9012 | 51000 | 0.3363 |
1318
- | 0.9021 | 51050 | 0.2674 |
1319
- | 0.9030 | 51100 | 0.331 |
1320
- | 0.9039 | 51150 | 0.2881 |
1321
- | 0.9047 | 51200 | 0.3553 |
1322
- | 0.9056 | 51250 | 0.2936 |
1323
- | 0.9065 | 51300 | 0.2987 |
1324
- | 0.9074 | 51350 | 0.3343 |
1325
- | 0.9083 | 51400 | 0.2919 |
1326
 
1327
- </details>
1328
 
1329
  ### Framework Versions
1330
  - Python: 3.11.13
 
1187
  </details>
1188
 
1189
  ### Training Logs
 
 
1190
  | Epoch | Step | Training Loss |
1191
  |:------:|:-----:|:-------------:|
1192
+ | 0.9092 | 51450 | 0.2997 |
1193
+ | 0.9100 | 51500 | 0.2247 |
1194
+ | 0.9109 | 51550 | 0.2918 |
1195
+ | 0.9118 | 51600 | 0.3295 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1196
 
 
1197
 
1198
  ### Framework Versions
1199
  - Python: 3.11.13
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f6095595fb3007928d93e8e219ea90fc483d7933ff71480f93acc7032b0be18
3
  size 90864192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeecd8d9471d7a0b6627d01abbb6ab9354f23702be741b49535069a09a804f63
3
  size 90864192
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5528739540379e7472ab0ae207cba54ed4fa2009128623e5cc01b704f91e4334
3
  size 180609210
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33a86535350dee0d838aabde4505a86ae7f966c06b02b476617bf119348d925b
3
  size 180609210
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:00b1d2649da1c33c4c6171910ab9897e0cf291c76124110f095256b857bf610a
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a589e7cc1c3f0f337824a3d6a8bc421970447f40e56c215be58c6b1d6c52a5a0
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ed7436953ae4e16b051c219b7cb3a2e2190d41cf4e564a35d17483bdb596e66
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39475f4e00274ea95478c0aa6d264be5db3bd87ad877f934d4b5cd1f7f759e82
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ac1d7bd261ca287c71b297338a8785cacbd5d9b3b50d5e9d2567d8805ed369c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0adae6525b0259ff389512f804b95631a5bbbd0e13a53824c06deb281fe18e7
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.9082716332985811,
6
  "eval_steps": 500,
7
- "global_step": 51400,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -7204,6 +7204,34 @@
7204
  "learning_rate": 5.1167265516090395e-06,
7205
  "loss": 0.2919,
7206
  "step": 51400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7207
  }
7208
  ],
7209
  "logging_steps": 50,
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.9118057641674471,
6
  "eval_steps": 500,
7
+ "global_step": 51600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
7204
  "learning_rate": 5.1167265516090395e-06,
7205
  "loss": 0.2919,
7206
  "step": 51400
7207
+ },
7208
+ {
7209
+ "epoch": 0.9091551660157976,
7210
+ "grad_norm": 1.3142715692520142,
7211
+ "learning_rate": 5.067640533270503e-06,
7212
+ "loss": 0.2997,
7213
+ "step": 51450
7214
+ },
7215
+ {
7216
+ "epoch": 0.9100386987330141,
7217
+ "grad_norm": 1.3387079238891602,
7218
+ "learning_rate": 5.018554514931967e-06,
7219
+ "loss": 0.2247,
7220
+ "step": 51500
7221
+ },
7222
+ {
7223
+ "epoch": 0.9109222314502307,
7224
+ "grad_norm": 1.9581636190414429,
7225
+ "learning_rate": 4.96946849659343e-06,
7226
+ "loss": 0.2918,
7227
+ "step": 51550
7228
+ },
7229
+ {
7230
+ "epoch": 0.9118057641674471,
7231
+ "grad_norm": 1.3822007179260254,
7232
+ "learning_rate": 4.920382478254894e-06,
7233
+ "loss": 0.3295,
7234
+ "step": 51600
7235
  }
7236
  ],
7237
  "logging_steps": 50,
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b97480b9506272973ef63d3e54eedf906636737574e44c7ac22ad4421393b708
3
  size 5560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b1eb037399f5df91069f9b474c7a40c3e0d7a309caabb274d665499c0377b52
3
  size 5560