Training in progress, step 40, checkpoint

Browse files

Files changed (5) hide show

checkpoint-40/adapter_config.json +6 -6
checkpoint-40/adapter_model.safetensors +1 -1
checkpoint-40/optimizer.pt +1 -1
checkpoint-40/trainer_state.json +40 -40
checkpoint-40/training_args.bin +1 -1

checkpoint-40/adapter_config.json CHANGED Viewed

@@ -28,16 +28,16 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "k_proj",
     "up_proj",
     "fc2",
-    "o_proj",
     "down_proj",
-    "out_proj",
-    "q_proj",
     "fc1",
-    "gate_proj",
-    "v_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "up_proj",
     "fc2",
+    "gate_proj",
     "down_proj",
+    "v_proj",
     "fc1",
+    "o_proj",
+    "k_proj",
+    "out_proj",
+    "q_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

checkpoint-40/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:61c9e776efe85ee07a4c6341cf098d958bb2a5a5932fb07ac818ca53044e7df1
 size 6127553104

 version https://git-lfs.github.com/spec/v1
+oid sha256:6c71211f5f0e2a913ae611ab507210ee79545f372ac0d4cd80471a997fd5d4c0
 size 6127553104

checkpoint-40/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2b55031dcf5cdaf00a1418a750bbf3adfab581e79e572f775fe2d475f55aafb5
 size 12255795061

 version https://git-lfs.github.com/spec/v1
+oid sha256:0088e7dddbe8085dd036f4e509ba61586143429bb95021117b4ec8a6c6e56a9a
 size 12255795061

checkpoint-40/trainer_state.json CHANGED Viewed

@@ -10,87 +10,87 @@
   "is_world_process_zero": true,
   "log_history": [
     {
-      "entropy": 3.240912365913391,
       "epoch": 0.0069180214458664825,
-      "grad_norm": 48.360877990722656,
       "learning_rate": 4.0909090909090915e-05,
-      "loss": 8.7196,
-      "mean_token_accuracy": 0.2005771730095148,
       "num_tokens": 44798.0,
       "step": 10
     },
     {
       "epoch": 0.0069180214458664825,
-      "eval_entropy": 3.4391425704956053,
-      "eval_loss": 7.116836071014404,
-      "eval_mean_token_accuracy": 0.280334330201149,
       "eval_num_tokens": 44798.0,
-      "eval_runtime": 41.2852,
-      "eval_samples_per_second": 4.844,
-      "eval_steps_per_second": 1.211,
       "step": 10
     },
     {
-      "entropy": 4.105616652965546,
       "epoch": 0.013836042891732965,
-      "grad_norm": 23.306013107299805,
       "learning_rate": 8.636363636363637e-05,
-      "loss": 5.052,
-      "mean_token_accuracy": 0.4401222452521324,
       "num_tokens": 89551.0,
       "step": 20
     },
     {
       "epoch": 0.013836042891732965,
-      "eval_entropy": 4.973556356430054,
-      "eval_loss": 2.668402671813965,
-      "eval_mean_token_accuracy": 0.648028552532196,
       "eval_num_tokens": 89551.0,
-      "eval_runtime": 41.2844,
-      "eval_samples_per_second": 4.844,
-      "eval_steps_per_second": 1.211,
       "step": 20
     },
     {
-      "entropy": 4.935999858379364,
       "epoch": 0.020754064337599448,
-      "grad_norm": 7.5827813148498535,
       "learning_rate": 0.0001318181818181818,
-      "loss": 1.8098,
-      "mean_token_accuracy": 0.7725904256105423,
       "num_tokens": 134427.0,
       "step": 30
     },
     {
       "epoch": 0.020754064337599448,
-      "eval_entropy": 4.271238183975219,
-      "eval_loss": 1.040016531944275,
-      "eval_mean_token_accuracy": 0.850863606929779,
       "eval_num_tokens": 134427.0,
-      "eval_runtime": 41.4413,
-      "eval_samples_per_second": 4.826,
-      "eval_steps_per_second": 1.207,
       "step": 30
     },
     {
-      "entropy": 3.026445063948631,
       "epoch": 0.02767208578346593,
-      "grad_norm": 5.579833984375,
       "learning_rate": 0.00017727272727272728,
-      "loss": 0.7995,
-      "mean_token_accuracy": 0.88612859249115,
       "num_tokens": 179334.0,
       "step": 40
     },
     {
       "epoch": 0.02767208578346593,
-      "eval_entropy": 1.5599463820457458,
-      "eval_loss": 0.4294031858444214,
-      "eval_mean_token_accuracy": 0.8945973372459411,
       "eval_num_tokens": 179334.0,
-      "eval_runtime": 41.2068,
-      "eval_samples_per_second": 4.854,
-      "eval_steps_per_second": 1.213,
       "step": 40
     }
   ],

   "is_world_process_zero": true,
   "log_history": [
     {
+      "entropy": 3.238903135061264,
       "epoch": 0.0069180214458664825,
+      "grad_norm": 44.88839340209961,
       "learning_rate": 4.0909090909090915e-05,
+      "loss": 8.7207,
+      "mean_token_accuracy": 0.20161552485078574,
       "num_tokens": 44798.0,
       "step": 10
     },
     {
       "epoch": 0.0069180214458664825,
+      "eval_entropy": 3.485778374671936,
+      "eval_loss": 7.185971260070801,
+      "eval_mean_token_accuracy": 0.2777047342061996,
       "eval_num_tokens": 44798.0,
+      "eval_runtime": 41.459,
+      "eval_samples_per_second": 4.824,
+      "eval_steps_per_second": 1.206,
       "step": 10
     },
     {
+      "entropy": 4.110421192646027,
       "epoch": 0.013836042891732965,
+      "grad_norm": 26.09234619140625,
       "learning_rate": 8.636363636363637e-05,
+      "loss": 5.0369,
+      "mean_token_accuracy": 0.4463097870349884,
       "num_tokens": 89551.0,
       "step": 20
     },
     {
       "epoch": 0.013836042891732965,
+      "eval_entropy": 5.049612331390381,
+      "eval_loss": 2.6804354190826416,
+      "eval_mean_token_accuracy": 0.6554659616947174,
       "eval_num_tokens": 89551.0,
+      "eval_runtime": 41.3525,
+      "eval_samples_per_second": 4.836,
+      "eval_steps_per_second": 1.209,
       "step": 20
     },
     {
+      "entropy": 4.915739822387695,
       "epoch": 0.020754064337599448,
+      "grad_norm": 6.044945240020752,
       "learning_rate": 0.0001318181818181818,
+      "loss": 1.7923,
+      "mean_token_accuracy": 0.7768757700920105,
       "num_tokens": 134427.0,
       "step": 30
     },
     {
       "epoch": 0.020754064337599448,
+      "eval_entropy": 4.307503514289856,
+      "eval_loss": 1.08396577835083,
+      "eval_mean_token_accuracy": 0.8470160067081451,
       "eval_num_tokens": 134427.0,
+      "eval_runtime": 41.6811,
+      "eval_samples_per_second": 4.798,
+      "eval_steps_per_second": 1.2,
       "step": 30
     },
     {
+      "entropy": 2.974119684100151,
       "epoch": 0.02767208578346593,
+      "grad_norm": 4.946300506591797,
       "learning_rate": 0.00017727272727272728,
+      "loss": 0.7439,
+      "mean_token_accuracy": 0.8815336391329766,
       "num_tokens": 179334.0,
       "step": 40
     },
     {
       "epoch": 0.02767208578346593,
+      "eval_entropy": 1.1884030628204345,
+      "eval_loss": 0.30163103342056274,
+      "eval_mean_token_accuracy": 0.8916239559650421,
       "eval_num_tokens": 179334.0,
+      "eval_runtime": 41.5034,
+      "eval_samples_per_second": 4.819,
+      "eval_steps_per_second": 1.205,
       "step": 40
     }
   ],

checkpoint-40/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c1bebc67618e014c760eda21fce679ce0675e3a51da68b3f33bf511148f795f1
 size 6481

 version https://git-lfs.github.com/spec/v1
+oid sha256:a7d1da37edb56f19d5d0e4a00c4a139121e78d4c28df66d5a6172229619a3e96
 size 6481