Training in progress, step 40, checkpoint

Browse files

Files changed (8) hide show

checkpoint-40/README.md +1 -0
checkpoint-40/adapter_config.json +8 -8
checkpoint-40/adapter_model.safetensors +1 -1
checkpoint-40/optimizer.pt +1 -1
checkpoint-40/rng_state.pth +3 -0
checkpoint-40/scheduler.pt +1 -1
checkpoint-40/trainer_state.json +63 -55
checkpoint-40/training_args.bin +2 -2

checkpoint-40/README.md CHANGED Viewed

@@ -206,4 +206,5 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
 [More Information Needed]
 ### Framework versions
 - PEFT 0.17.0

 [More Information Needed]
 ### Framework versions
+- PEFT 0.17.1
 - PEFT 0.17.0

checkpoint-40/adapter_config.json CHANGED Viewed

@@ -28,16 +28,16 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "gate_proj",
-    "down_proj",
-    "k_proj",
-    "q_proj",
-    "up_proj",
     "out_proj",
-    "fc2",
     "o_proj",
-    "fc1",
-    "v_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "fc1",
     "out_proj",
     "o_proj",
+    "up_proj",
+    "fc2",
+    "down_proj",
+    "q_proj",
+    "v_proj",
+    "gate_proj",
+    "k_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

checkpoint-40/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4cbcfe1f41e66b10185c7c23e72dbe0c64ccb805a63800a0ae1caf362db1bd42
 size 6127553104

 version https://git-lfs.github.com/spec/v1
+oid sha256:33bade23015f4045ee0a8d0d679bae5c4dc74148e0f11343dcf69b439959847a
 size 6127553104

checkpoint-40/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:12fd9a9b120124f8376c3388385cee42e91deeaa62c9d61bbf14eaac21b50f6b
 size 12255795061

 version https://git-lfs.github.com/spec/v1
+oid sha256:0b2b61f4edfd72f8844a70dcb0c4d170178d52e5859ec5a391d8902a1f52baf3
 size 12255795061

checkpoint-40/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c4052a06a1770ce7fb13f802f937f2d5bd93bc4c8f5245843df3f044b089a959
+size 14645

checkpoint-40/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6b812d5b7786acb1b99263d36c30ce0644124ca952ff59de991632fe5b919d86
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:336f88a90f504e7bb0e74b841e7fdbee0cdae6a693fa4e196b353dcdb2b44886
 size 1465

checkpoint-40/trainer_state.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.33264033264033266,
   "eval_steps": 10,
   "global_step": 40,
   "is_hyper_param_search": false,
@@ -10,84 +10,92 @@
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.08316008316008316,
-      "grad_norm": 11.767539024353027,
-      "learning_rate": 0.00019145299145299148,
-      "loss": 18.0054,
-      "mean_token_accuracy": 0.8393091425299645,
-      "num_tokens": 323168.0,
       "step": 10
     },
     {
-      "epoch": 0.08316008316008316,
-      "eval_loss": 0.14767414331436157,
-      "eval_mean_token_accuracy": 0.9865884414085975,
-      "eval_num_tokens": 323168.0,
-      "eval_runtime": 32.0686,
-      "eval_samples_per_second": 6.237,
-      "eval_steps_per_second": 0.405,
       "step": 10
     },
     {
-      "epoch": 0.16632016632016633,
-      "grad_norm": 4.4300336837768555,
-      "learning_rate": 0.00017435897435897436,
-      "loss": 1.6772,
-      "mean_token_accuracy": 0.9893433898687363,
-      "num_tokens": 646431.0,
       "step": 20
     },
     {
-      "epoch": 0.16632016632016633,
-      "eval_loss": 0.057891350239515305,
-      "eval_mean_token_accuracy": 0.993429972575261,
-      "eval_num_tokens": 646431.0,
-      "eval_runtime": 32.7893,
-      "eval_samples_per_second": 6.1,
-      "eval_steps_per_second": 0.396,
       "step": 20
     },
     {
-      "epoch": 0.2494802494802495,
-      "grad_norm": 2.2051281929016113,
-      "learning_rate": 0.00015726495726495727,
-      "loss": 0.406,
-      "mean_token_accuracy": 0.9940585166215896,
-      "num_tokens": 969623.0,
       "step": 30
     },
     {
-      "epoch": 0.2494802494802495,
-      "eval_loss": 0.01031240914016962,
-      "eval_mean_token_accuracy": 0.9945538227374737,
-      "eval_num_tokens": 969623.0,
-      "eval_runtime": 32.2752,
-      "eval_samples_per_second": 6.197,
-      "eval_steps_per_second": 0.403,
       "step": 30
     },
     {
-      "epoch": 0.33264033264033266,
-      "grad_norm": 0.7954460382461548,
-      "learning_rate": 0.00014017094017094016,
-      "loss": 0.156,
-      "mean_token_accuracy": 0.9945382237434387,
-      "num_tokens": 1292839.0,
       "step": 40
     },
     {
-      "epoch": 0.33264033264033266,
-      "eval_loss": 0.009185228496789932,
-      "eval_mean_token_accuracy": 0.9949805828241202,
-      "eval_num_tokens": 1292839.0,
-      "eval_runtime": 31.808,
-      "eval_samples_per_second": 6.288,
-      "eval_steps_per_second": 0.409,
       "step": 40
     }
   ],
   "logging_steps": 10,
-  "max_steps": 121,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 40,
@@ -103,7 +111,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.146055700223099e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.02767208578346593,
   "eval_steps": 10,
   "global_step": 40,
   "is_hyper_param_search": false,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "entropy": 3.2428319215774537,
+      "epoch": 0.0069180214458664825,
+      "grad_norm": 45.59263229370117,
+      "learning_rate": 4.0909090909090915e-05,
+      "loss": 8.7229,
+      "mean_token_accuracy": 0.20135476849973202,
+      "num_tokens": 44798.0,
       "step": 10
     },
     {
+      "epoch": 0.0069180214458664825,
+      "eval_entropy": 3.4517827892303465,
+      "eval_loss": 7.022937774658203,
+      "eval_mean_token_accuracy": 0.29038591831922533,
+      "eval_num_tokens": 44798.0,
+      "eval_runtime": 42.5158,
+      "eval_samples_per_second": 4.704,
+      "eval_steps_per_second": 1.176,
       "step": 10
     },
     {
+      "entropy": 4.103338432312012,
+      "epoch": 0.013836042891732965,
+      "grad_norm": 15.53995418548584,
+      "learning_rate": 8.636363636363637e-05,
+      "loss": 5.0491,
+      "mean_token_accuracy": 0.44307171255350114,
+      "num_tokens": 89551.0,
       "step": 20
     },
     {
+      "epoch": 0.013836042891732965,
+      "eval_entropy": 4.955911350250244,
+      "eval_loss": 2.6293535232543945,
+      "eval_mean_token_accuracy": 0.6638811433315277,
+      "eval_num_tokens": 89551.0,
+      "eval_runtime": 42.5341,
+      "eval_samples_per_second": 4.702,
+      "eval_steps_per_second": 1.176,
       "step": 20
     },
     {
+      "entropy": 4.794859045743943,
+      "epoch": 0.020754064337599448,
+      "grad_norm": 8.58745002746582,
+      "learning_rate": 0.0001318181818181818,
+      "loss": 1.7872,
+      "mean_token_accuracy": 0.7792469739913941,
+      "num_tokens": 134427.0,
       "step": 30
     },
     {
+      "epoch": 0.020754064337599448,
+      "eval_entropy": 4.026243486404419,
+      "eval_loss": 1.0728627443313599,
+      "eval_mean_token_accuracy": 0.8464114594459534,
+      "eval_num_tokens": 134427.0,
+      "eval_runtime": 42.5437,
+      "eval_samples_per_second": 4.701,
+      "eval_steps_per_second": 1.175,
       "step": 30
     },
     {
+      "entropy": 2.9154508650302886,
+      "epoch": 0.02767208578346593,
+      "grad_norm": 5.161023139953613,
+      "learning_rate": 0.00017727272727272728,
+      "loss": 0.7894,
+      "mean_token_accuracy": 0.881743885576725,
+      "num_tokens": 179334.0,
       "step": 40
     },
     {
+      "epoch": 0.02767208578346593,
+      "eval_entropy": 1.3028265857696533,
+      "eval_loss": 0.27193209528923035,
+      "eval_mean_token_accuracy": 0.8934199070930481,
+      "eval_num_tokens": 179334.0,
+      "eval_runtime": 42.5195,
+      "eval_samples_per_second": 4.704,
+      "eval_steps_per_second": 1.176,
       "step": 40
     }
   ],
   "logging_steps": 10,
+  "max_steps": 1446,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 40,
       "attributes": {}
     }
   },
+  "total_flos": 3.0410461093170816e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

checkpoint-40/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fd81184c4386bdd5320f1754d4cda79540e3bb45d4e9eeffadfdb4c17e09fef2
-size 6353

 version https://git-lfs.github.com/spec/v1
+oid sha256:c665ff9710ba066622bdc47a0845adeeeb156957d33148906e62f67561245a3f
+size 6481