wangyichen25 commited on
Commit
ef9fa18
·
verified ·
1 Parent(s): b02afb7

Training in progress, step 40, checkpoint

Browse files
checkpoint-40/README.md CHANGED
@@ -206,4 +206,5 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
206
  [More Information Needed]
207
  ### Framework versions
208
 
 
209
  - PEFT 0.17.0
 
206
  [More Information Needed]
207
  ### Framework versions
208
 
209
+ - PEFT 0.17.1
210
  - PEFT 0.17.0
checkpoint-40/adapter_config.json CHANGED
@@ -28,16 +28,16 @@
28
  "rank_pattern": {},
29
  "revision": null,
30
  "target_modules": [
31
- "gate_proj",
32
- "down_proj",
33
- "k_proj",
34
- "q_proj",
35
- "up_proj",
36
  "out_proj",
37
- "fc2",
38
  "o_proj",
39
- "fc1",
40
- "v_proj"
 
 
 
 
 
41
  ],
42
  "target_parameters": null,
43
  "task_type": "CAUSAL_LM",
 
28
  "rank_pattern": {},
29
  "revision": null,
30
  "target_modules": [
31
+ "fc1",
 
 
 
 
32
  "out_proj",
 
33
  "o_proj",
34
+ "up_proj",
35
+ "fc2",
36
+ "down_proj",
37
+ "q_proj",
38
+ "v_proj",
39
+ "gate_proj",
40
+ "k_proj"
41
  ],
42
  "target_parameters": null,
43
  "task_type": "CAUSAL_LM",
checkpoint-40/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4cbcfe1f41e66b10185c7c23e72dbe0c64ccb805a63800a0ae1caf362db1bd42
3
  size 6127553104
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33bade23015f4045ee0a8d0d679bae5c4dc74148e0f11343dcf69b439959847a
3
  size 6127553104
checkpoint-40/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12fd9a9b120124f8376c3388385cee42e91deeaa62c9d61bbf14eaac21b50f6b
3
  size 12255795061
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b2b61f4edfd72f8844a70dcb0c4d170178d52e5859ec5a391d8902a1f52baf3
3
  size 12255795061
checkpoint-40/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4052a06a1770ce7fb13f802f937f2d5bd93bc4c8f5245843df3f044b089a959
3
+ size 14645
checkpoint-40/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b812d5b7786acb1b99263d36c30ce0644124ca952ff59de991632fe5b919d86
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:336f88a90f504e7bb0e74b841e7fdbee0cdae6a693fa4e196b353dcdb2b44886
3
  size 1465
checkpoint-40/trainer_state.json CHANGED
@@ -2,7 +2,7 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.33264033264033266,
6
  "eval_steps": 10,
7
  "global_step": 40,
8
  "is_hyper_param_search": false,
@@ -10,84 +10,92 @@
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
- "epoch": 0.08316008316008316,
14
- "grad_norm": 11.767539024353027,
15
- "learning_rate": 0.00019145299145299148,
16
- "loss": 18.0054,
17
- "mean_token_accuracy": 0.8393091425299645,
18
- "num_tokens": 323168.0,
 
19
  "step": 10
20
  },
21
  {
22
- "epoch": 0.08316008316008316,
23
- "eval_loss": 0.14767414331436157,
24
- "eval_mean_token_accuracy": 0.9865884414085975,
25
- "eval_num_tokens": 323168.0,
26
- "eval_runtime": 32.0686,
27
- "eval_samples_per_second": 6.237,
28
- "eval_steps_per_second": 0.405,
 
29
  "step": 10
30
  },
31
  {
32
- "epoch": 0.16632016632016633,
33
- "grad_norm": 4.4300336837768555,
34
- "learning_rate": 0.00017435897435897436,
35
- "loss": 1.6772,
36
- "mean_token_accuracy": 0.9893433898687363,
37
- "num_tokens": 646431.0,
 
38
  "step": 20
39
  },
40
  {
41
- "epoch": 0.16632016632016633,
42
- "eval_loss": 0.057891350239515305,
43
- "eval_mean_token_accuracy": 0.993429972575261,
44
- "eval_num_tokens": 646431.0,
45
- "eval_runtime": 32.7893,
46
- "eval_samples_per_second": 6.1,
47
- "eval_steps_per_second": 0.396,
 
48
  "step": 20
49
  },
50
  {
51
- "epoch": 0.2494802494802495,
52
- "grad_norm": 2.2051281929016113,
53
- "learning_rate": 0.00015726495726495727,
54
- "loss": 0.406,
55
- "mean_token_accuracy": 0.9940585166215896,
56
- "num_tokens": 969623.0,
 
57
  "step": 30
58
  },
59
  {
60
- "epoch": 0.2494802494802495,
61
- "eval_loss": 0.01031240914016962,
62
- "eval_mean_token_accuracy": 0.9945538227374737,
63
- "eval_num_tokens": 969623.0,
64
- "eval_runtime": 32.2752,
65
- "eval_samples_per_second": 6.197,
66
- "eval_steps_per_second": 0.403,
 
67
  "step": 30
68
  },
69
  {
70
- "epoch": 0.33264033264033266,
71
- "grad_norm": 0.7954460382461548,
72
- "learning_rate": 0.00014017094017094016,
73
- "loss": 0.156,
74
- "mean_token_accuracy": 0.9945382237434387,
75
- "num_tokens": 1292839.0,
 
76
  "step": 40
77
  },
78
  {
79
- "epoch": 0.33264033264033266,
80
- "eval_loss": 0.009185228496789932,
81
- "eval_mean_token_accuracy": 0.9949805828241202,
82
- "eval_num_tokens": 1292839.0,
83
- "eval_runtime": 31.808,
84
- "eval_samples_per_second": 6.288,
85
- "eval_steps_per_second": 0.409,
 
86
  "step": 40
87
  }
88
  ],
89
  "logging_steps": 10,
90
- "max_steps": 121,
91
  "num_input_tokens_seen": 0,
92
  "num_train_epochs": 1,
93
  "save_steps": 40,
@@ -103,7 +111,7 @@
103
  "attributes": {}
104
  }
105
  },
106
- "total_flos": 2.146055700223099e+17,
107
  "train_batch_size": 4,
108
  "trial_name": null,
109
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.02767208578346593,
6
  "eval_steps": 10,
7
  "global_step": 40,
8
  "is_hyper_param_search": false,
 
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
+ "entropy": 3.2428319215774537,
14
+ "epoch": 0.0069180214458664825,
15
+ "grad_norm": 45.59263229370117,
16
+ "learning_rate": 4.0909090909090915e-05,
17
+ "loss": 8.7229,
18
+ "mean_token_accuracy": 0.20135476849973202,
19
+ "num_tokens": 44798.0,
20
  "step": 10
21
  },
22
  {
23
+ "epoch": 0.0069180214458664825,
24
+ "eval_entropy": 3.4517827892303465,
25
+ "eval_loss": 7.022937774658203,
26
+ "eval_mean_token_accuracy": 0.29038591831922533,
27
+ "eval_num_tokens": 44798.0,
28
+ "eval_runtime": 42.5158,
29
+ "eval_samples_per_second": 4.704,
30
+ "eval_steps_per_second": 1.176,
31
  "step": 10
32
  },
33
  {
34
+ "entropy": 4.103338432312012,
35
+ "epoch": 0.013836042891732965,
36
+ "grad_norm": 15.53995418548584,
37
+ "learning_rate": 8.636363636363637e-05,
38
+ "loss": 5.0491,
39
+ "mean_token_accuracy": 0.44307171255350114,
40
+ "num_tokens": 89551.0,
41
  "step": 20
42
  },
43
  {
44
+ "epoch": 0.013836042891732965,
45
+ "eval_entropy": 4.955911350250244,
46
+ "eval_loss": 2.6293535232543945,
47
+ "eval_mean_token_accuracy": 0.6638811433315277,
48
+ "eval_num_tokens": 89551.0,
49
+ "eval_runtime": 42.5341,
50
+ "eval_samples_per_second": 4.702,
51
+ "eval_steps_per_second": 1.176,
52
  "step": 20
53
  },
54
  {
55
+ "entropy": 4.794859045743943,
56
+ "epoch": 0.020754064337599448,
57
+ "grad_norm": 8.58745002746582,
58
+ "learning_rate": 0.0001318181818181818,
59
+ "loss": 1.7872,
60
+ "mean_token_accuracy": 0.7792469739913941,
61
+ "num_tokens": 134427.0,
62
  "step": 30
63
  },
64
  {
65
+ "epoch": 0.020754064337599448,
66
+ "eval_entropy": 4.026243486404419,
67
+ "eval_loss": 1.0728627443313599,
68
+ "eval_mean_token_accuracy": 0.8464114594459534,
69
+ "eval_num_tokens": 134427.0,
70
+ "eval_runtime": 42.5437,
71
+ "eval_samples_per_second": 4.701,
72
+ "eval_steps_per_second": 1.175,
73
  "step": 30
74
  },
75
  {
76
+ "entropy": 2.9154508650302886,
77
+ "epoch": 0.02767208578346593,
78
+ "grad_norm": 5.161023139953613,
79
+ "learning_rate": 0.00017727272727272728,
80
+ "loss": 0.7894,
81
+ "mean_token_accuracy": 0.881743885576725,
82
+ "num_tokens": 179334.0,
83
  "step": 40
84
  },
85
  {
86
+ "epoch": 0.02767208578346593,
87
+ "eval_entropy": 1.3028265857696533,
88
+ "eval_loss": 0.27193209528923035,
89
+ "eval_mean_token_accuracy": 0.8934199070930481,
90
+ "eval_num_tokens": 179334.0,
91
+ "eval_runtime": 42.5195,
92
+ "eval_samples_per_second": 4.704,
93
+ "eval_steps_per_second": 1.176,
94
  "step": 40
95
  }
96
  ],
97
  "logging_steps": 10,
98
+ "max_steps": 1446,
99
  "num_input_tokens_seen": 0,
100
  "num_train_epochs": 1,
101
  "save_steps": 40,
 
111
  "attributes": {}
112
  }
113
  },
114
+ "total_flos": 3.0410461093170816e+16,
115
  "train_batch_size": 4,
116
  "trial_name": null,
117
  "trial_params": null
checkpoint-40/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd81184c4386bdd5320f1754d4cda79540e3bb45d4e9eeffadfdb4c17e09fef2
3
- size 6353
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c665ff9710ba066622bdc47a0845adeeeb156957d33148906e62f67561245a3f
3
+ size 6481