Benyucong commited on
Commit
a7ddfa8
·
verified ·
1 Parent(s): 45b7cb6

Training in progress, step 1740, checkpoint

Browse files
.gitattributes CHANGED
@@ -70,3 +70,21 @@ checkpoint-26100/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs
70
  checkpoint-26100/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
71
  checkpoint-26100/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
72
  checkpoint-26100/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  checkpoint-26100/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
71
  checkpoint-26100/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
72
  checkpoint-26100/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
73
+ checkpoint-1740/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text
74
+ checkpoint-1740/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
75
+ checkpoint-1740/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
76
+ checkpoint-1740/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
77
+ checkpoint-1740/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
78
+ checkpoint-1740/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
79
+ checkpoint-1740/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
80
+ checkpoint-1740/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
81
+ checkpoint-1740/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
82
+ checkpoint-1740/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text
83
+ checkpoint-1740/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
84
+ checkpoint-1740/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
85
+ checkpoint-1740/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
86
+ checkpoint-1740/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
87
+ checkpoint-1740/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
88
+ checkpoint-1740/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
89
+ checkpoint-1740/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
90
+ checkpoint-1740/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
checkpoint-1740/optimizer_0/.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fe3ece4f55d0bc775e503a8725cfb5d6385b56f3a3d78944ac2ca22c063c86c
3
+ size 1533607
checkpoint-1740/optimizer_0/__0_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9697df6ed26aab67a5eb297b21f1ed7ef897ff3352585872164c1833a6cf5be
3
+ size 4023049125
checkpoint-1740/optimizer_0/__1_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe6bce1ff002bfb025d142b2dafcde10f258fc4ececfb650263d0e65036121e3
3
+ size 4023141220
checkpoint-1740/optimizer_0/__2_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b4b944c2bccdcb152926b2aa32daf4c98551f98aa389774089d01a62941ed38
3
+ size 4023141220
checkpoint-1740/optimizer_0/__3_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75cf116ec175058d8aa5da9c34f8d976f73efe37cb64022d33959f0fcce07a1b
3
+ size 4023141220
checkpoint-1740/optimizer_0/__4_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94c8aea2de1bfd0ee38e8f77a97ad31bd5cfa663a21ef8e7bf0d47f0780d1678
3
+ size 4023148068
checkpoint-1740/optimizer_0/__5_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5146dff10fee6dec78048558bd7f2c46e7e3ede2cf2c47d2fab9ec699462e295
3
+ size 4023151095
checkpoint-1740/optimizer_0/__6_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc507c5c2c91b170b709921cb9d249b09e527d1e3fd98368dc372a2ea98c684a
3
+ size 4023143426
checkpoint-1740/optimizer_0/__7_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8f5f63924572e83a079c6725ed004504cb1c0af84664e8d7ff2fbd77da55f64
3
+ size 4023102466
checkpoint-1740/pytorch_model_fsdp_0/.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:763b720519bf8b24185356278ba85e651716ffffde383edfc3b9ee9046d37b5d
3
+ size 619642
checkpoint-1740/pytorch_model_fsdp_0/__0_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ccc89d24418a0cd5432d89a499a6f60d657530044c35b90bc5bd6167570d411
3
+ size 2205665511
checkpoint-1740/pytorch_model_fsdp_0/__1_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e76a29707031238dc95b96b3ef14660214dbe71127141eede422509abbaf185a
3
+ size 2205665511
checkpoint-1740/pytorch_model_fsdp_0/__2_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4c4316f1e28ba83363472b49c2199e7bf6ecd4f88ca7c8b4d72b79b39c06ed2
3
+ size 2205665511
checkpoint-1740/pytorch_model_fsdp_0/__3_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ad4dc7d4ec67f0a29d80c8f38d041c2bcfa652444d848ca7621745cdd6756fb
3
+ size 2205665511
checkpoint-1740/pytorch_model_fsdp_0/__4_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45b57ca6abc992815301617c3389af82366ec594f45ab95ac5f9d082f7392a38
3
+ size 2205665511
checkpoint-1740/pytorch_model_fsdp_0/__5_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b4f65114edb8892a239fa613670794cbc122e36506505e9226b931ab634a08f
3
+ size 2205665511
checkpoint-1740/pytorch_model_fsdp_0/__6_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d2e3f9fc0e3e41b51df6f170692009a2f2d9419eca1ab630ca23ab9d7857332
3
+ size 2205665511
checkpoint-1740/pytorch_model_fsdp_0/__7_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c69ad442d84dd6e9b3abede0d529279a6e11178db320adcf741498542936405
3
+ size 2205624551
checkpoint-1740/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8f92cf63e0989759370d24108b469c492c12202403f036015307ce49f12cedc
3
+ size 16389
checkpoint-1740/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ed40a0a4e9f365d2c6cc004d97e6705894eba46c8be4c160c1455bc3062dee1
3
+ size 16389
checkpoint-1740/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d688b304d19c260b5cfa471535ed51d7e1d60b3a0d0159dfd1a04b87904a9f42
3
+ size 16389
checkpoint-1740/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9967425ebcaee80d9b518fa0244d52f739b1b983d87cda71d5fede0c073e9d3b
3
+ size 16389
checkpoint-1740/rng_state_4.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:469900fd39c667ffbd49c3c407c0ba317a1e9f5f9339a99b5d38423b7d0ce6d4
3
+ size 16389
checkpoint-1740/rng_state_5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:124688471ff2a6e80f2fcefedbf741fb18d08dd539d5bd07a52e81be545142a5
3
+ size 16389
checkpoint-1740/rng_state_6.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e69f1ced9f992a72c948698e5eb06088610788988cdb2fdbdd624e064319d60
3
+ size 16389
checkpoint-1740/rng_state_7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a376268a55d6ee10c371c06aa952334c4c6a1af9ea2d71b1951a57367a0c6722
3
+ size 16389
checkpoint-1740/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0062148f33d4f3fe23b9c81d88aa689f2913dd5593389ad79c20174eb8711547
3
+ size 1465
checkpoint-1740/trainer_state.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 1.0,
6
+ "eval_steps": 500,
7
+ "global_step": 1740,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.28735632183908044,
14
+ "grad_norm": 0.23086652159690857,
15
+ "learning_rate": 1.4264367816091955e-05,
16
+ "loss": 0.2356,
17
+ "mean_token_accuracy": 0.9099636316299439,
18
+ "step": 500
19
+ },
20
+ {
21
+ "epoch": 0.5747126436781609,
22
+ "grad_norm": 0.4159923195838928,
23
+ "learning_rate": 8.517241379310345e-06,
24
+ "loss": 0.2265,
25
+ "mean_token_accuracy": 0.9128129783868789,
26
+ "step": 1000
27
+ },
28
+ {
29
+ "epoch": 0.8620689655172413,
30
+ "grad_norm": 0.3880501389503479,
31
+ "learning_rate": 2.770114942528736e-06,
32
+ "loss": 0.2071,
33
+ "mean_token_accuracy": 0.9202006314992904,
34
+ "step": 1500
35
+ }
36
+ ],
37
+ "logging_steps": 500,
38
+ "max_steps": 1740,
39
+ "num_input_tokens_seen": 0,
40
+ "num_train_epochs": 1,
41
+ "save_steps": 24000,
42
+ "stateful_callbacks": {
43
+ "TrainerControl": {
44
+ "args": {
45
+ "should_epoch_stop": false,
46
+ "should_evaluate": false,
47
+ "should_log": false,
48
+ "should_save": true,
49
+ "should_training_stop": true
50
+ },
51
+ "attributes": {}
52
+ }
53
+ },
54
+ "total_flos": 6.358452657979392e+16,
55
+ "train_batch_size": 1,
56
+ "trial_name": null,
57
+ "trial_params": null
58
+ }