Training in progress, step 1740, checkpoint
Browse files- .gitattributes +18 -0
- checkpoint-1740/optimizer_0/.metadata +3 -0
- checkpoint-1740/optimizer_0/__0_0.distcp +3 -0
- checkpoint-1740/optimizer_0/__1_0.distcp +3 -0
- checkpoint-1740/optimizer_0/__2_0.distcp +3 -0
- checkpoint-1740/optimizer_0/__3_0.distcp +3 -0
- checkpoint-1740/optimizer_0/__4_0.distcp +3 -0
- checkpoint-1740/optimizer_0/__5_0.distcp +3 -0
- checkpoint-1740/optimizer_0/__6_0.distcp +3 -0
- checkpoint-1740/optimizer_0/__7_0.distcp +3 -0
- checkpoint-1740/pytorch_model_fsdp_0/.metadata +3 -0
- checkpoint-1740/pytorch_model_fsdp_0/__0_0.distcp +3 -0
- checkpoint-1740/pytorch_model_fsdp_0/__1_0.distcp +3 -0
- checkpoint-1740/pytorch_model_fsdp_0/__2_0.distcp +3 -0
- checkpoint-1740/pytorch_model_fsdp_0/__3_0.distcp +3 -0
- checkpoint-1740/pytorch_model_fsdp_0/__4_0.distcp +3 -0
- checkpoint-1740/pytorch_model_fsdp_0/__5_0.distcp +3 -0
- checkpoint-1740/pytorch_model_fsdp_0/__6_0.distcp +3 -0
- checkpoint-1740/pytorch_model_fsdp_0/__7_0.distcp +3 -0
- checkpoint-1740/rng_state_0.pth +3 -0
- checkpoint-1740/rng_state_1.pth +3 -0
- checkpoint-1740/rng_state_2.pth +3 -0
- checkpoint-1740/rng_state_3.pth +3 -0
- checkpoint-1740/rng_state_4.pth +3 -0
- checkpoint-1740/rng_state_5.pth +3 -0
- checkpoint-1740/rng_state_6.pth +3 -0
- checkpoint-1740/rng_state_7.pth +3 -0
- checkpoint-1740/scheduler.pt +3 -0
- checkpoint-1740/trainer_state.json +58 -0
.gitattributes
CHANGED
@@ -70,3 +70,21 @@ checkpoint-26100/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs
|
|
70 |
checkpoint-26100/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
|
71 |
checkpoint-26100/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
|
72 |
checkpoint-26100/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
checkpoint-26100/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
|
71 |
checkpoint-26100/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
|
72 |
checkpoint-26100/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
73 |
+
checkpoint-1740/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text
|
74 |
+
checkpoint-1740/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
|
75 |
+
checkpoint-1740/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
|
76 |
+
checkpoint-1740/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
|
77 |
+
checkpoint-1740/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
|
78 |
+
checkpoint-1740/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
|
79 |
+
checkpoint-1740/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
|
80 |
+
checkpoint-1740/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
|
81 |
+
checkpoint-1740/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
82 |
+
checkpoint-1740/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text
|
83 |
+
checkpoint-1740/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
|
84 |
+
checkpoint-1740/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
|
85 |
+
checkpoint-1740/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
|
86 |
+
checkpoint-1740/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
|
87 |
+
checkpoint-1740/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
|
88 |
+
checkpoint-1740/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
|
89 |
+
checkpoint-1740/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
|
90 |
+
checkpoint-1740/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
checkpoint-1740/optimizer_0/.metadata
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0fe3ece4f55d0bc775e503a8725cfb5d6385b56f3a3d78944ac2ca22c063c86c
|
3 |
+
size 1533607
|
checkpoint-1740/optimizer_0/__0_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c9697df6ed26aab67a5eb297b21f1ed7ef897ff3352585872164c1833a6cf5be
|
3 |
+
size 4023049125
|
checkpoint-1740/optimizer_0/__1_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fe6bce1ff002bfb025d142b2dafcde10f258fc4ececfb650263d0e65036121e3
|
3 |
+
size 4023141220
|
checkpoint-1740/optimizer_0/__2_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b4b944c2bccdcb152926b2aa32daf4c98551f98aa389774089d01a62941ed38
|
3 |
+
size 4023141220
|
checkpoint-1740/optimizer_0/__3_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:75cf116ec175058d8aa5da9c34f8d976f73efe37cb64022d33959f0fcce07a1b
|
3 |
+
size 4023141220
|
checkpoint-1740/optimizer_0/__4_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:94c8aea2de1bfd0ee38e8f77a97ad31bd5cfa663a21ef8e7bf0d47f0780d1678
|
3 |
+
size 4023148068
|
checkpoint-1740/optimizer_0/__5_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5146dff10fee6dec78048558bd7f2c46e7e3ede2cf2c47d2fab9ec699462e295
|
3 |
+
size 4023151095
|
checkpoint-1740/optimizer_0/__6_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc507c5c2c91b170b709921cb9d249b09e527d1e3fd98368dc372a2ea98c684a
|
3 |
+
size 4023143426
|
checkpoint-1740/optimizer_0/__7_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a8f5f63924572e83a079c6725ed004504cb1c0af84664e8d7ff2fbd77da55f64
|
3 |
+
size 4023102466
|
checkpoint-1740/pytorch_model_fsdp_0/.metadata
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:763b720519bf8b24185356278ba85e651716ffffde383edfc3b9ee9046d37b5d
|
3 |
+
size 619642
|
checkpoint-1740/pytorch_model_fsdp_0/__0_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0ccc89d24418a0cd5432d89a499a6f60d657530044c35b90bc5bd6167570d411
|
3 |
+
size 2205665511
|
checkpoint-1740/pytorch_model_fsdp_0/__1_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e76a29707031238dc95b96b3ef14660214dbe71127141eede422509abbaf185a
|
3 |
+
size 2205665511
|
checkpoint-1740/pytorch_model_fsdp_0/__2_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e4c4316f1e28ba83363472b49c2199e7bf6ecd4f88ca7c8b4d72b79b39c06ed2
|
3 |
+
size 2205665511
|
checkpoint-1740/pytorch_model_fsdp_0/__3_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9ad4dc7d4ec67f0a29d80c8f38d041c2bcfa652444d848ca7621745cdd6756fb
|
3 |
+
size 2205665511
|
checkpoint-1740/pytorch_model_fsdp_0/__4_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:45b57ca6abc992815301617c3389af82366ec594f45ab95ac5f9d082f7392a38
|
3 |
+
size 2205665511
|
checkpoint-1740/pytorch_model_fsdp_0/__5_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8b4f65114edb8892a239fa613670794cbc122e36506505e9226b931ab634a08f
|
3 |
+
size 2205665511
|
checkpoint-1740/pytorch_model_fsdp_0/__6_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d2e3f9fc0e3e41b51df6f170692009a2f2d9419eca1ab630ca23ab9d7857332
|
3 |
+
size 2205665511
|
checkpoint-1740/pytorch_model_fsdp_0/__7_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1c69ad442d84dd6e9b3abede0d529279a6e11178db320adcf741498542936405
|
3 |
+
size 2205624551
|
checkpoint-1740/rng_state_0.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f8f92cf63e0989759370d24108b469c492c12202403f036015307ce49f12cedc
|
3 |
+
size 16389
|
checkpoint-1740/rng_state_1.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9ed40a0a4e9f365d2c6cc004d97e6705894eba46c8be4c160c1455bc3062dee1
|
3 |
+
size 16389
|
checkpoint-1740/rng_state_2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d688b304d19c260b5cfa471535ed51d7e1d60b3a0d0159dfd1a04b87904a9f42
|
3 |
+
size 16389
|
checkpoint-1740/rng_state_3.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9967425ebcaee80d9b518fa0244d52f739b1b983d87cda71d5fede0c073e9d3b
|
3 |
+
size 16389
|
checkpoint-1740/rng_state_4.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:469900fd39c667ffbd49c3c407c0ba317a1e9f5f9339a99b5d38423b7d0ce6d4
|
3 |
+
size 16389
|
checkpoint-1740/rng_state_5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:124688471ff2a6e80f2fcefedbf741fb18d08dd539d5bd07a52e81be545142a5
|
3 |
+
size 16389
|
checkpoint-1740/rng_state_6.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1e69f1ced9f992a72c948698e5eb06088610788988cdb2fdbdd624e064319d60
|
3 |
+
size 16389
|
checkpoint-1740/rng_state_7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a376268a55d6ee10c371c06aa952334c4c6a1af9ea2d71b1951a57367a0c6722
|
3 |
+
size 16389
|
checkpoint-1740/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0062148f33d4f3fe23b9c81d88aa689f2913dd5593389ad79c20174eb8711547
|
3 |
+
size 1465
|
checkpoint-1740/trainer_state.json
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_global_step": null,
|
3 |
+
"best_metric": null,
|
4 |
+
"best_model_checkpoint": null,
|
5 |
+
"epoch": 1.0,
|
6 |
+
"eval_steps": 500,
|
7 |
+
"global_step": 1740,
|
8 |
+
"is_hyper_param_search": false,
|
9 |
+
"is_local_process_zero": true,
|
10 |
+
"is_world_process_zero": true,
|
11 |
+
"log_history": [
|
12 |
+
{
|
13 |
+
"epoch": 0.28735632183908044,
|
14 |
+
"grad_norm": 0.23086652159690857,
|
15 |
+
"learning_rate": 1.4264367816091955e-05,
|
16 |
+
"loss": 0.2356,
|
17 |
+
"mean_token_accuracy": 0.9099636316299439,
|
18 |
+
"step": 500
|
19 |
+
},
|
20 |
+
{
|
21 |
+
"epoch": 0.5747126436781609,
|
22 |
+
"grad_norm": 0.4159923195838928,
|
23 |
+
"learning_rate": 8.517241379310345e-06,
|
24 |
+
"loss": 0.2265,
|
25 |
+
"mean_token_accuracy": 0.9128129783868789,
|
26 |
+
"step": 1000
|
27 |
+
},
|
28 |
+
{
|
29 |
+
"epoch": 0.8620689655172413,
|
30 |
+
"grad_norm": 0.3880501389503479,
|
31 |
+
"learning_rate": 2.770114942528736e-06,
|
32 |
+
"loss": 0.2071,
|
33 |
+
"mean_token_accuracy": 0.9202006314992904,
|
34 |
+
"step": 1500
|
35 |
+
}
|
36 |
+
],
|
37 |
+
"logging_steps": 500,
|
38 |
+
"max_steps": 1740,
|
39 |
+
"num_input_tokens_seen": 0,
|
40 |
+
"num_train_epochs": 1,
|
41 |
+
"save_steps": 24000,
|
42 |
+
"stateful_callbacks": {
|
43 |
+
"TrainerControl": {
|
44 |
+
"args": {
|
45 |
+
"should_epoch_stop": false,
|
46 |
+
"should_evaluate": false,
|
47 |
+
"should_log": false,
|
48 |
+
"should_save": true,
|
49 |
+
"should_training_stop": true
|
50 |
+
},
|
51 |
+
"attributes": {}
|
52 |
+
}
|
53 |
+
},
|
54 |
+
"total_flos": 6.358452657979392e+16,
|
55 |
+
"train_batch_size": 1,
|
56 |
+
"trial_name": null,
|
57 |
+
"trial_params": null
|
58 |
+
}
|