guyhadad01 commited on
Commit
e0e3325
·
verified ·
1 Parent(s): cc2cdf3

Training in progress, step 50000, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -1187,6 +1187,8 @@ You can finetune this model on your own dataset.
1187
  </details>
1188
 
1189
  ### Training Logs
 
 
1190
  | Epoch | Step | Training Loss |
1191
  |:------:|:-----:|:-------------:|
1192
  | 0.7925 | 44850 | 0.25 |
@@ -1289,7 +1291,12 @@ You can finetune this model on your own dataset.
1289
  | 0.8782 | 49700 | 0.267 |
1290
  | 0.8791 | 49750 | 0.2599 |
1291
  | 0.8800 | 49800 | 0.3226 |
 
 
 
 
1292
 
 
1293
 
1294
  ### Framework Versions
1295
  - Python: 3.11.13
 
1187
  </details>
1188
 
1189
  ### Training Logs
1190
+ <details><summary>Click to expand</summary>
1191
+
1192
  | Epoch | Step | Training Loss |
1193
  |:------:|:-----:|:-------------:|
1194
  | 0.7925 | 44850 | 0.25 |
 
1291
  | 0.8782 | 49700 | 0.267 |
1292
  | 0.8791 | 49750 | 0.2599 |
1293
  | 0.8800 | 49800 | 0.3226 |
1294
+ | 0.8809 | 49850 | 0.3516 |
1295
+ | 0.8818 | 49900 | 0.2912 |
1296
+ | 0.8826 | 49950 | 0.2853 |
1297
+ | 0.8835 | 50000 | 0.3212 |
1298
 
1299
+ </details>
1300
 
1301
  ### Framework Versions
1302
  - Python: 3.11.13
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed2d369eb5785f37e695387aeb0f0e1b88aba7649dea47063848b806090ff91f
3
  size 90864192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f629f9c4d9b3dfe15298ca42cf535d9537f9d84496ead633516c3f55df659809
3
  size 90864192
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c88873a80431132be5b9a6198db1d2eb68145fe27f94342674f074b433cbd62
3
  size 180609210
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82a5e0de80f3319e9bcb41ff6fcaf27ef7f48f3c5947829a81867a4d0d22eced
3
  size 180609210
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0c031c47895ee65720a29aa486037b758096a0c008e8c7d3bf0a06b848dd598
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39be9337bc1edfc554e8e83920537fc1f1ac1fb64502256befd2c768cb7272f2
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98ce6fbeef0ab2e0cecf23435bfa5221dc56a6ce59fe8d9b84d1e42bcc5d69c4
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05900a01ed21b7d3fe65d7b48e29dc049394fbc9a57676739fac72875cfe1dad
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1cd200196e0f200dd4627f60d44a82669aaadc601edaa64b9c6e1ddba3c662c9
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f94f189d019244fe1bc7202740eb1c5db8cec176873eb287a31349f77949d54f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.8799985863476525,
6
  "eval_steps": 500,
7
- "global_step": 49800,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -6980,6 +6980,34 @@
6980
  "learning_rate": 6.686497418075435e-06,
6981
  "loss": 0.3226,
6982
  "step": 49800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6983
  }
6984
  ],
6985
  "logging_steps": 50,
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.8835327172165185,
6
  "eval_steps": 500,
7
+ "global_step": 50000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
6980
  "learning_rate": 6.686497418075435e-06,
6981
  "loss": 0.3226,
6982
  "step": 49800
6983
+ },
6984
+ {
6985
+ "epoch": 0.880882119064869,
6986
+ "grad_norm": 1.385986089706421,
6987
+ "learning_rate": 6.6374113997369e-06,
6988
+ "loss": 0.3516,
6989
+ "step": 49850
6990
+ },
6991
+ {
6992
+ "epoch": 0.8817656517820854,
6993
+ "grad_norm": 1.4890649318695068,
6994
+ "learning_rate": 6.588325381398362e-06,
6995
+ "loss": 0.2912,
6996
+ "step": 49900
6997
+ },
6998
+ {
6999
+ "epoch": 0.882649184499302,
7000
+ "grad_norm": 2.459829807281494,
7001
+ "learning_rate": 6.5392393630598265e-06,
7002
+ "loss": 0.2853,
7003
+ "step": 49950
7004
+ },
7005
+ {
7006
+ "epoch": 0.8835327172165185,
7007
+ "grad_norm": 1.6274219751358032,
7008
+ "learning_rate": 6.4901533447212895e-06,
7009
+ "loss": 0.3212,
7010
+ "step": 50000
7011
  }
7012
  ],
7013
  "logging_steps": 50,