wangyichen25 commited on
Commit
a4c3471
·
verified ·
1 Parent(s): ac30cdf

Training in progress, step 40, checkpoint

Browse files
checkpoint-40/adapter_config.json CHANGED
@@ -28,16 +28,16 @@
28
  "rank_pattern": {},
29
  "revision": null,
30
  "target_modules": [
31
- "k_proj",
32
  "up_proj",
33
  "fc2",
34
- "o_proj",
35
  "down_proj",
36
- "out_proj",
37
- "q_proj",
38
  "fc1",
39
- "gate_proj",
40
- "v_proj"
 
 
41
  ],
42
  "target_parameters": null,
43
  "task_type": "CAUSAL_LM",
 
28
  "rank_pattern": {},
29
  "revision": null,
30
  "target_modules": [
 
31
  "up_proj",
32
  "fc2",
33
+ "gate_proj",
34
  "down_proj",
35
+ "v_proj",
 
36
  "fc1",
37
+ "o_proj",
38
+ "k_proj",
39
+ "out_proj",
40
+ "q_proj"
41
  ],
42
  "target_parameters": null,
43
  "task_type": "CAUSAL_LM",
checkpoint-40/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:61c9e776efe85ee07a4c6341cf098d958bb2a5a5932fb07ac818ca53044e7df1
3
  size 6127553104
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c71211f5f0e2a913ae611ab507210ee79545f372ac0d4cd80471a997fd5d4c0
3
  size 6127553104
checkpoint-40/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b55031dcf5cdaf00a1418a750bbf3adfab581e79e572f775fe2d475f55aafb5
3
  size 12255795061
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0088e7dddbe8085dd036f4e509ba61586143429bb95021117b4ec8a6c6e56a9a
3
  size 12255795061
checkpoint-40/trainer_state.json CHANGED
@@ -10,87 +10,87 @@
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
- "entropy": 3.240912365913391,
14
  "epoch": 0.0069180214458664825,
15
- "grad_norm": 48.360877990722656,
16
  "learning_rate": 4.0909090909090915e-05,
17
- "loss": 8.7196,
18
- "mean_token_accuracy": 0.2005771730095148,
19
  "num_tokens": 44798.0,
20
  "step": 10
21
  },
22
  {
23
  "epoch": 0.0069180214458664825,
24
- "eval_entropy": 3.4391425704956053,
25
- "eval_loss": 7.116836071014404,
26
- "eval_mean_token_accuracy": 0.280334330201149,
27
  "eval_num_tokens": 44798.0,
28
- "eval_runtime": 41.2852,
29
- "eval_samples_per_second": 4.844,
30
- "eval_steps_per_second": 1.211,
31
  "step": 10
32
  },
33
  {
34
- "entropy": 4.105616652965546,
35
  "epoch": 0.013836042891732965,
36
- "grad_norm": 23.306013107299805,
37
  "learning_rate": 8.636363636363637e-05,
38
- "loss": 5.052,
39
- "mean_token_accuracy": 0.4401222452521324,
40
  "num_tokens": 89551.0,
41
  "step": 20
42
  },
43
  {
44
  "epoch": 0.013836042891732965,
45
- "eval_entropy": 4.973556356430054,
46
- "eval_loss": 2.668402671813965,
47
- "eval_mean_token_accuracy": 0.648028552532196,
48
  "eval_num_tokens": 89551.0,
49
- "eval_runtime": 41.2844,
50
- "eval_samples_per_second": 4.844,
51
- "eval_steps_per_second": 1.211,
52
  "step": 20
53
  },
54
  {
55
- "entropy": 4.935999858379364,
56
  "epoch": 0.020754064337599448,
57
- "grad_norm": 7.5827813148498535,
58
  "learning_rate": 0.0001318181818181818,
59
- "loss": 1.8098,
60
- "mean_token_accuracy": 0.7725904256105423,
61
  "num_tokens": 134427.0,
62
  "step": 30
63
  },
64
  {
65
  "epoch": 0.020754064337599448,
66
- "eval_entropy": 4.271238183975219,
67
- "eval_loss": 1.040016531944275,
68
- "eval_mean_token_accuracy": 0.850863606929779,
69
  "eval_num_tokens": 134427.0,
70
- "eval_runtime": 41.4413,
71
- "eval_samples_per_second": 4.826,
72
- "eval_steps_per_second": 1.207,
73
  "step": 30
74
  },
75
  {
76
- "entropy": 3.026445063948631,
77
  "epoch": 0.02767208578346593,
78
- "grad_norm": 5.579833984375,
79
  "learning_rate": 0.00017727272727272728,
80
- "loss": 0.7995,
81
- "mean_token_accuracy": 0.88612859249115,
82
  "num_tokens": 179334.0,
83
  "step": 40
84
  },
85
  {
86
  "epoch": 0.02767208578346593,
87
- "eval_entropy": 1.5599463820457458,
88
- "eval_loss": 0.4294031858444214,
89
- "eval_mean_token_accuracy": 0.8945973372459411,
90
  "eval_num_tokens": 179334.0,
91
- "eval_runtime": 41.2068,
92
- "eval_samples_per_second": 4.854,
93
- "eval_steps_per_second": 1.213,
94
  "step": 40
95
  }
96
  ],
 
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
+ "entropy": 3.238903135061264,
14
  "epoch": 0.0069180214458664825,
15
+ "grad_norm": 44.88839340209961,
16
  "learning_rate": 4.0909090909090915e-05,
17
+ "loss": 8.7207,
18
+ "mean_token_accuracy": 0.20161552485078574,
19
  "num_tokens": 44798.0,
20
  "step": 10
21
  },
22
  {
23
  "epoch": 0.0069180214458664825,
24
+ "eval_entropy": 3.485778374671936,
25
+ "eval_loss": 7.185971260070801,
26
+ "eval_mean_token_accuracy": 0.2777047342061996,
27
  "eval_num_tokens": 44798.0,
28
+ "eval_runtime": 41.459,
29
+ "eval_samples_per_second": 4.824,
30
+ "eval_steps_per_second": 1.206,
31
  "step": 10
32
  },
33
  {
34
+ "entropy": 4.110421192646027,
35
  "epoch": 0.013836042891732965,
36
+ "grad_norm": 26.09234619140625,
37
  "learning_rate": 8.636363636363637e-05,
38
+ "loss": 5.0369,
39
+ "mean_token_accuracy": 0.4463097870349884,
40
  "num_tokens": 89551.0,
41
  "step": 20
42
  },
43
  {
44
  "epoch": 0.013836042891732965,
45
+ "eval_entropy": 5.049612331390381,
46
+ "eval_loss": 2.6804354190826416,
47
+ "eval_mean_token_accuracy": 0.6554659616947174,
48
  "eval_num_tokens": 89551.0,
49
+ "eval_runtime": 41.3525,
50
+ "eval_samples_per_second": 4.836,
51
+ "eval_steps_per_second": 1.209,
52
  "step": 20
53
  },
54
  {
55
+ "entropy": 4.915739822387695,
56
  "epoch": 0.020754064337599448,
57
+ "grad_norm": 6.044945240020752,
58
  "learning_rate": 0.0001318181818181818,
59
+ "loss": 1.7923,
60
+ "mean_token_accuracy": 0.7768757700920105,
61
  "num_tokens": 134427.0,
62
  "step": 30
63
  },
64
  {
65
  "epoch": 0.020754064337599448,
66
+ "eval_entropy": 4.307503514289856,
67
+ "eval_loss": 1.08396577835083,
68
+ "eval_mean_token_accuracy": 0.8470160067081451,
69
  "eval_num_tokens": 134427.0,
70
+ "eval_runtime": 41.6811,
71
+ "eval_samples_per_second": 4.798,
72
+ "eval_steps_per_second": 1.2,
73
  "step": 30
74
  },
75
  {
76
+ "entropy": 2.974119684100151,
77
  "epoch": 0.02767208578346593,
78
+ "grad_norm": 4.946300506591797,
79
  "learning_rate": 0.00017727272727272728,
80
+ "loss": 0.7439,
81
+ "mean_token_accuracy": 0.8815336391329766,
82
  "num_tokens": 179334.0,
83
  "step": 40
84
  },
85
  {
86
  "epoch": 0.02767208578346593,
87
+ "eval_entropy": 1.1884030628204345,
88
+ "eval_loss": 0.30163103342056274,
89
+ "eval_mean_token_accuracy": 0.8916239559650421,
90
  "eval_num_tokens": 179334.0,
91
+ "eval_runtime": 41.5034,
92
+ "eval_samples_per_second": 4.819,
93
+ "eval_steps_per_second": 1.205,
94
  "step": 40
95
  }
96
  ],
checkpoint-40/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1bebc67618e014c760eda21fce679ce0675e3a51da68b3f33bf511148f795f1
3
  size 6481
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7d1da37edb56f19d5d0e4a00c4a139121e78d4c28df66d5a6172229619a3e96
3
  size 6481