nRuaif commited on Aug 25, 2023

Commit

c4fc286

1 Parent(s): 846dde2

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.ipynb_checkpoints/Untitled-checkpoint.ipynb +6 -0
Untitled.ipynb +50 -0
adapter_config.json +26 -0
added_tokens.json +3 -0
checkpoint-10/README.md +34 -0
checkpoint-10/adapter_config.json +26 -0
checkpoint-10/adapter_model.bin +3 -0
checkpoint-10/adapter_model/README.md +34 -0
checkpoint-10/adapter_model/adapter_config.json +26 -0
checkpoint-10/adapter_model/adapter_model.bin +3 -0
checkpoint-10/optimizer.pt +3 -0
checkpoint-10/rng_state.pth +3 -0
checkpoint-10/scheduler.pt +3 -0
checkpoint-10/trainer_state.json +87 -0
checkpoint-10/training_args.bin +3 -0
checkpoint-20/README.md +21 -0
checkpoint-20/adapter_config.json +26 -0
checkpoint-20/adapter_model.bin +3 -0
checkpoint-20/adapter_model/README.md +21 -0
checkpoint-20/adapter_model/adapter_config.json +26 -0
checkpoint-20/adapter_model/adapter_model.bin +3 -0
checkpoint-20/optimizer.pt +3 -0
checkpoint-20/rng_state.pth +3 -0
checkpoint-20/scheduler.pt +3 -0
checkpoint-20/trainer_state.json +155 -0
checkpoint-20/training_args.bin +3 -0
checkpoint-30/README.md +21 -0
checkpoint-30/adapter_config.json +26 -0
checkpoint-30/adapter_model.bin +3 -0
checkpoint-30/adapter_model/README.md +21 -0
checkpoint-30/adapter_model/adapter_config.json +26 -0
checkpoint-30/adapter_model/adapter_model.bin +3 -0
checkpoint-30/optimizer.pt +3 -0
checkpoint-30/rng_state.pth +3 -0
checkpoint-30/scheduler.pt +3 -0
checkpoint-30/trainer_state.json +223 -0
checkpoint-30/training_args.bin +3 -0
checkpoint-40/README.md +21 -0
checkpoint-40/adapter_config.json +26 -0
checkpoint-40/adapter_model.bin +3 -0
checkpoint-40/adapter_model/README.md +21 -0
checkpoint-40/adapter_model/adapter_config.json +26 -0
checkpoint-40/adapter_model/adapter_model.bin +3 -0
checkpoint-40/optimizer.pt +3 -0
checkpoint-40/rng_state.pth +3 -0
checkpoint-40/scheduler.pt +3 -0
checkpoint-40/trainer_state.json +291 -0
checkpoint-40/training_args.bin +3 -0
checkpoint-50/README.md +21 -0
checkpoint-50/adapter_config.json +26 -0

.ipynb_checkpoints/Untitled-checkpoint.ipynb ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+ "cells": [],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

Untitled.ipynb ADDED Viewed

	@@ -0,0 +1,50 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "127668ee-44f0-4438-9337-e7c4a486aea3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from huggingface_hub import HfApi\n",
+    "\n",
+    "api = HfApi()\n",
+    "\n",
+    "# Upload all the content from the local folder to your remote Space.\n",
+    "# By default, files are uploaded at the root of the repo\n",
+    "\n",
+    "api.upload_folder(\n",
+    "\n",
+    "    folder_path=r\"C:\\dataset\\New folder\",\n",
+    "\n",
+    "    repo_id=\"MinervaAI/Random-roleplay-instruction\",\n",
+    "\n",
+    "    repo_type=\"dataset\",\n",
+    "\n",
+    ")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

adapter_config.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "NousResearch/Llama-2-13b-hf",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": false,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj",
+    "up_proj",
+    "gate_proj",
+    "o_proj",
+    "k_proj",
+    "down_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "<pad>": 32000
+}

checkpoint-10/README.md ADDED Viewed

	@@ -0,0 +1,34 @@

+---
+library_name: peft
+---
+## Training procedure
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+- PEFT 0.6.0.dev0
+- PEFT 0.6.0.dev0

checkpoint-10/adapter_config.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "NousResearch/Llama-2-13b-hf",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj",
+    "up_proj",
+    "gate_proj",
+    "o_proj",
+    "k_proj",
+    "down_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

checkpoint-10/adapter_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8d73dd5f0590e7395a03feab2c12262e8144fdb10f84d6cc08f5c2b521c7d832
+size 62788109

checkpoint-10/adapter_model/README.md ADDED Viewed

	@@ -0,0 +1,34 @@

+---
+library_name: peft
+---
+## Training procedure
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+- PEFT 0.6.0.dev0
+- PEFT 0.6.0.dev0

checkpoint-10/adapter_model/adapter_config.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "NousResearch/Llama-2-13b-hf",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj",
+    "up_proj",
+    "gate_proj",
+    "o_proj",
+    "k_proj",
+    "down_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

checkpoint-10/adapter_model/adapter_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8d73dd5f0590e7395a03feab2c12262e8144fdb10f84d6cc08f5c2b521c7d832
+size 62788109

checkpoint-10/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:93fc15492ebf7acd0d7da2effdc336db5cf94da3d033e4bfcba017a5ba9ff578
+size 250681597

checkpoint-10/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b8d0d39ee3f495298b1364d1a694b15161510d7f6b4a57e4ad295a4590655a19
+size 14575

checkpoint-10/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e3f23bc239cbe3e7a4f862f2448a71aa49fc3b361c6c5866cc00242a779c0be4
+size 627

checkpoint-10/trainer_state.json ADDED Viewed

	@@ -0,0 +1,87 @@

+{
+  "best_metric": 2.098437547683716,
+  "best_model_checkpoint": "./qlora-out-kimiko-test2/checkpoint-10",
+  "epoch": 0.25848142164781907,
+  "eval_steps": 10,
+  "global_step": 10,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.03,
+      "learning_rate": 1e-05,
+      "loss": 1.5707,
+      "step": 1
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 2e-05,
+      "loss": 1.5621,
+      "step": 2
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 3e-05,
+      "loss": 1.4812,
+      "step": 3
+    },
+    {
+      "epoch": 0.1,
+      "learning_rate": 4e-05,
+      "loss": 1.5197,
+      "step": 4
+    },
+    {
+      "epoch": 0.13,
+      "learning_rate": 5e-05,
+      "loss": 1.5567,
+      "step": 5
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 5e-05,
+      "loss": 1.4645,
+      "step": 6
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 5e-05,
+      "loss": 1.6122,
+      "step": 7
+    },
+    {
+      "epoch": 0.21,
+      "learning_rate": 5e-05,
+      "loss": 1.5596,
+      "step": 8
+    },
+    {
+      "epoch": 0.23,
+      "learning_rate": 5e-05,
+      "loss": 1.5608,
+      "step": 9
+    },
+    {
+      "epoch": 0.26,
+      "learning_rate": 5e-05,
+      "loss": 1.5456,
+      "step": 10
+    },
+    {
+      "epoch": 0.26,
+      "eval_loss": 2.098437547683716,
+      "eval_runtime": 119.6161,
+      "eval_samples_per_second": 1.555,
+      "eval_steps_per_second": 0.777,
+      "step": 10
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 114,
+  "num_train_epochs": 3,
+  "save_steps": 10,
+  "total_flos": 4.582587092041728e+16,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-10/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ca362c6e6a1bbe2523b2190a501c92d6dbb3db6186bef551619d83852cca3df1
+size 4219

checkpoint-20/README.md ADDED Viewed

	@@ -0,0 +1,21 @@

+---
+library_name: peft
+---
+## Training procedure
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+- PEFT 0.6.0.dev0

checkpoint-20/adapter_config.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "NousResearch/Llama-2-13b-hf",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj",
+    "up_proj",
+    "gate_proj",
+    "o_proj",
+    "k_proj",
+    "down_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

checkpoint-20/adapter_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:03d0c38222a997a52bfe76857917321fb2bc8d10553b4da4ff8390643c25f962
+size 62788109

checkpoint-20/adapter_model/README.md ADDED Viewed

	@@ -0,0 +1,21 @@

+---
+library_name: peft
+---
+## Training procedure
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+- PEFT 0.6.0.dev0

checkpoint-20/adapter_model/adapter_config.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "NousResearch/Llama-2-13b-hf",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj",
+    "up_proj",
+    "gate_proj",
+    "o_proj",
+    "k_proj",
+    "down_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

checkpoint-20/adapter_model/adapter_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:03d0c38222a997a52bfe76857917321fb2bc8d10553b4da4ff8390643c25f962
+size 62788109

checkpoint-20/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:56e8c6c5c1d0e07913575e95580988f8c4d4d451e12e22619c79c5df6b48dc53
+size 250681597

checkpoint-20/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6cf66b0a499885f833314015600150cb6bfa74e1505e5608890c7c4ba655a6ba
+size 14575

checkpoint-20/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e64bd2871a807ae2da11c5073d611b4d4223c336499ef21e9e856aaa448a1a35
+size 627

checkpoint-20/trainer_state.json ADDED Viewed

	@@ -0,0 +1,155 @@

+{
+  "best_metric": 2.066981077194214,
+  "best_model_checkpoint": "./qlora-out-kimiko-test2/checkpoint-20",
+  "epoch": 0.5169628432956381,
+  "eval_steps": 10,
+  "global_step": 20,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.03,
+      "learning_rate": 1e-05,
+      "loss": 1.5707,
+      "step": 1
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 2e-05,
+      "loss": 1.5621,
+      "step": 2
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 3e-05,
+      "loss": 1.4812,
+      "step": 3
+    },
+    {
+      "epoch": 0.1,
+      "learning_rate": 4e-05,
+      "loss": 1.5197,
+      "step": 4
+    },
+    {
+      "epoch": 0.13,
+      "learning_rate": 5e-05,
+      "loss": 1.5567,
+      "step": 5
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 5e-05,
+      "loss": 1.4645,
+      "step": 6
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 5e-05,
+      "loss": 1.6122,
+      "step": 7
+    },
+    {
+      "epoch": 0.21,
+      "learning_rate": 5e-05,
+      "loss": 1.5596,
+      "step": 8
+    },
+    {
+      "epoch": 0.23,
+      "learning_rate": 5e-05,
+      "loss": 1.5608,
+      "step": 9
+    },
+    {
+      "epoch": 0.26,
+      "learning_rate": 5e-05,
+      "loss": 1.5456,
+      "step": 10
+    },
+    {
+      "epoch": 0.26,
+      "eval_loss": 2.098437547683716,
+      "eval_runtime": 119.6161,
+      "eval_samples_per_second": 1.555,
+      "eval_steps_per_second": 0.777,
+      "step": 10
+    },
+    {
+      "epoch": 0.28,
+      "learning_rate": 5e-05,
+      "loss": 1.5645,
+      "step": 11
+    },
+    {
+      "epoch": 0.31,
+      "learning_rate": 5e-05,
+      "loss": 1.538,
+      "step": 12
+    },
+    {
+      "epoch": 0.34,
+      "learning_rate": 5e-05,
+      "loss": 1.6388,
+      "step": 13
+    },
+    {
+      "epoch": 0.36,
+      "learning_rate": 5e-05,
+      "loss": 1.4943,
+      "step": 14
+    },
+    {
+      "epoch": 0.39,
+      "learning_rate": 5e-05,
+      "loss": 1.5469,
+      "step": 15
+    },
+    {
+      "epoch": 0.41,
+      "learning_rate": 5e-05,
+      "loss": 1.6149,
+      "step": 16
+    },
+    {
+      "epoch": 0.44,
+      "learning_rate": 5e-05,
+      "loss": 1.5345,
+      "step": 17
+    },
+    {
+      "epoch": 0.47,
+      "learning_rate": 5e-05,
+      "loss": 1.4903,
+      "step": 18
+    },
+    {
+      "epoch": 0.49,
+      "learning_rate": 5e-05,
+      "loss": 1.5499,
+      "step": 19
+    },
+    {
+      "epoch": 0.52,
+      "learning_rate": 5e-05,
+      "loss": 1.5934,
+      "step": 20
+    },
+    {
+      "epoch": 0.52,
+      "eval_loss": 2.066981077194214,
+      "eval_runtime": 119.5781,
+      "eval_samples_per_second": 1.555,
+      "eval_steps_per_second": 0.778,
+      "step": 20
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 114,
+  "num_train_epochs": 3,
+  "save_steps": 10,
+  "total_flos": 9.195307914756096e+16,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-20/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ca362c6e6a1bbe2523b2190a501c92d6dbb3db6186bef551619d83852cca3df1
+size 4219

checkpoint-30/README.md ADDED Viewed

	@@ -0,0 +1,21 @@

+---
+library_name: peft
+---
+## Training procedure
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+- PEFT 0.6.0.dev0

checkpoint-30/adapter_config.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "NousResearch/Llama-2-13b-hf",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj",
+    "up_proj",
+    "gate_proj",
+    "o_proj",
+    "k_proj",
+    "down_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

checkpoint-30/adapter_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4276aa46f33d393edc2d19308e186a1f07580d76eddb0bb6339bc4f3e80b9f58
+size 62788109

checkpoint-30/adapter_model/README.md ADDED Viewed

	@@ -0,0 +1,21 @@

+---
+library_name: peft
+---
+## Training procedure
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+- PEFT 0.6.0.dev0

checkpoint-30/adapter_model/adapter_config.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "NousResearch/Llama-2-13b-hf",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj",
+    "up_proj",
+    "gate_proj",
+    "o_proj",
+    "k_proj",
+    "down_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

checkpoint-30/adapter_model/adapter_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4276aa46f33d393edc2d19308e186a1f07580d76eddb0bb6339bc4f3e80b9f58
+size 62788109

checkpoint-30/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b18fc6beca12122a3d4309af96449623e23a0d54f4b1c5088810cb8933a25809
+size 250681597

checkpoint-30/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cbf64f9f720011db899c09efa51061d9067d83e6fc2235e5bb9d7087d72402fd
+size 14575

checkpoint-30/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:298c19c2d0215ea63da7419132c9dac6c9c75fdd531e32dfc1cb87dbb0aa8259
+size 627

checkpoint-30/trainer_state.json ADDED Viewed

	@@ -0,0 +1,223 @@

+{
+  "best_metric": 2.046339750289917,
+  "best_model_checkpoint": "./qlora-out-kimiko-test2/checkpoint-30",
+  "epoch": 0.7754442649434572,
+  "eval_steps": 10,
+  "global_step": 30,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.03,
+      "learning_rate": 1e-05,
+      "loss": 1.5707,
+      "step": 1
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 2e-05,
+      "loss": 1.5621,
+      "step": 2
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 3e-05,
+      "loss": 1.4812,
+      "step": 3
+    },
+    {
+      "epoch": 0.1,
+      "learning_rate": 4e-05,
+      "loss": 1.5197,
+      "step": 4
+    },
+    {
+      "epoch": 0.13,
+      "learning_rate": 5e-05,
+      "loss": 1.5567,
+      "step": 5
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 5e-05,
+      "loss": 1.4645,
+      "step": 6
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 5e-05,
+      "loss": 1.6122,
+      "step": 7
+    },
+    {
+      "epoch": 0.21,
+      "learning_rate": 5e-05,
+      "loss": 1.5596,
+      "step": 8
+    },
+    {
+      "epoch": 0.23,
+      "learning_rate": 5e-05,
+      "loss": 1.5608,
+      "step": 9
+    },
+    {
+      "epoch": 0.26,
+      "learning_rate": 5e-05,
+      "loss": 1.5456,
+      "step": 10
+    },
+    {
+      "epoch": 0.26,
+      "eval_loss": 2.098437547683716,
+      "eval_runtime": 119.6161,
+      "eval_samples_per_second": 1.555,
+      "eval_steps_per_second": 0.777,
+      "step": 10
+    },
+    {
+      "epoch": 0.28,
+      "learning_rate": 5e-05,
+      "loss": 1.5645,
+      "step": 11
+    },
+    {
+      "epoch": 0.31,
+      "learning_rate": 5e-05,
+      "loss": 1.538,
+      "step": 12
+    },
+    {
+      "epoch": 0.34,
+      "learning_rate": 5e-05,
+      "loss": 1.6388,
+      "step": 13
+    },
+    {
+      "epoch": 0.36,
+      "learning_rate": 5e-05,
+      "loss": 1.4943,
+      "step": 14
+    },
+    {
+      "epoch": 0.39,
+      "learning_rate": 5e-05,
+      "loss": 1.5469,
+      "step": 15
+    },
+    {
+      "epoch": 0.41,
+      "learning_rate": 5e-05,
+      "loss": 1.6149,
+      "step": 16
+    },
+    {
+      "epoch": 0.44,
+      "learning_rate": 5e-05,
+      "loss": 1.5345,
+      "step": 17
+    },
+    {
+      "epoch": 0.47,
+      "learning_rate": 5e-05,
+      "loss": 1.4903,
+      "step": 18
+    },
+    {
+      "epoch": 0.49,
+      "learning_rate": 5e-05,
+      "loss": 1.5499,
+      "step": 19
+    },
+    {
+      "epoch": 0.52,
+      "learning_rate": 5e-05,
+      "loss": 1.5934,
+      "step": 20
+    },
+    {
+      "epoch": 0.52,
+      "eval_loss": 2.066981077194214,
+      "eval_runtime": 119.5781,
+      "eval_samples_per_second": 1.555,
+      "eval_steps_per_second": 0.778,
+      "step": 20
+    },
+    {
+      "epoch": 0.54,
+      "learning_rate": 5e-05,
+      "loss": 1.4554,
+      "step": 21
+    },
+    {
+      "epoch": 0.57,
+      "learning_rate": 5e-05,
+      "loss": 1.5512,
+      "step": 22
+    },
+    {
+      "epoch": 0.59,
+      "learning_rate": 5e-05,
+      "loss": 1.4636,
+      "step": 23
+    },
+    {
+      "epoch": 0.62,
+      "learning_rate": 5e-05,
+      "loss": 1.5398,
+      "step": 24
+    },
+    {
+      "epoch": 0.65,
+      "learning_rate": 5e-05,
+      "loss": 1.5623,
+      "step": 25
+    },
+    {
+      "epoch": 0.67,
+      "learning_rate": 5e-05,
+      "loss": 1.4658,
+      "step": 26
+    },
+    {
+      "epoch": 0.7,
+      "learning_rate": 5e-05,
+      "loss": 1.4723,
+      "step": 27
+    },
+    {
+      "epoch": 0.72,
+      "learning_rate": 5e-05,
+      "loss": 1.432,
+      "step": 28
+    },
+    {
+      "epoch": 0.75,
+      "learning_rate": 5e-05,
+      "loss": 1.4814,
+      "step": 29
+    },
+    {
+      "epoch": 0.78,
+      "learning_rate": 5e-05,
+      "loss": 1.4924,
+      "step": 30
+    },
+    {
+      "epoch": 0.78,
+      "eval_loss": 2.046339750289917,
+      "eval_runtime": 119.5771,
+      "eval_samples_per_second": 1.555,
+      "eval_steps_per_second": 0.778,
+      "step": 30
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 114,
+  "num_train_epochs": 3,
+  "save_steps": 10,
+  "total_flos": 1.3777643892375552e+17,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-30/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ca362c6e6a1bbe2523b2190a501c92d6dbb3db6186bef551619d83852cca3df1
+size 4219

checkpoint-40/README.md ADDED Viewed

	@@ -0,0 +1,21 @@

+---
+library_name: peft
+---
+## Training procedure
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+- PEFT 0.6.0.dev0

checkpoint-40/adapter_config.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "NousResearch/Llama-2-13b-hf",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj",
+    "up_proj",
+    "gate_proj",
+    "o_proj",
+    "k_proj",
+    "down_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

checkpoint-40/adapter_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:12f5a629e5ff4db207707d71a1c324a97283a0a371cbd435fb090ea711a9e21c
+size 62788109

checkpoint-40/adapter_model/README.md ADDED Viewed

	@@ -0,0 +1,21 @@

+---
+library_name: peft
+---
+## Training procedure
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+- PEFT 0.6.0.dev0

checkpoint-40/adapter_model/adapter_config.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "NousResearch/Llama-2-13b-hf",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj",
+    "up_proj",
+    "gate_proj",
+    "o_proj",
+    "k_proj",
+    "down_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

checkpoint-40/adapter_model/adapter_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:12f5a629e5ff4db207707d71a1c324a97283a0a371cbd435fb090ea711a9e21c
+size 62788109

checkpoint-40/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d8c0a407654387b576e7ec5d641bcd7961201b07a96d39e22e3e9d77e541a513
+size 250681597

checkpoint-40/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6ef3dbc703c54e9cba3cb628e0e6ae4c7f37310ab3398f30c0c5503dcb749f77
+size 14575

checkpoint-40/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9d048c63c660c9f2cb6345c4f756c8abee0e570482b06bf9c08827d79b257394
+size 627

checkpoint-40/trainer_state.json ADDED Viewed

	@@ -0,0 +1,291 @@

+{
+  "best_metric": 2.0354697704315186,
+  "best_model_checkpoint": "./qlora-out-kimiko-test2/checkpoint-40",
+  "epoch": 1.0339256865912763,
+  "eval_steps": 10,
+  "global_step": 40,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.03,
+      "learning_rate": 1e-05,
+      "loss": 1.5707,
+      "step": 1
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 2e-05,
+      "loss": 1.5621,
+      "step": 2
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 3e-05,
+      "loss": 1.4812,
+      "step": 3
+    },
+    {
+      "epoch": 0.1,
+      "learning_rate": 4e-05,
+      "loss": 1.5197,
+      "step": 4
+    },
+    {
+      "epoch": 0.13,
+      "learning_rate": 5e-05,
+      "loss": 1.5567,
+      "step": 5
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 5e-05,
+      "loss": 1.4645,
+      "step": 6
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 5e-05,
+      "loss": 1.6122,
+      "step": 7
+    },
+    {
+      "epoch": 0.21,
+      "learning_rate": 5e-05,
+      "loss": 1.5596,
+      "step": 8
+    },
+    {
+      "epoch": 0.23,
+      "learning_rate": 5e-05,
+      "loss": 1.5608,
+      "step": 9
+    },
+    {
+      "epoch": 0.26,
+      "learning_rate": 5e-05,
+      "loss": 1.5456,
+      "step": 10
+    },
+    {
+      "epoch": 0.26,
+      "eval_loss": 2.098437547683716,
+      "eval_runtime": 119.6161,
+      "eval_samples_per_second": 1.555,
+      "eval_steps_per_second": 0.777,
+      "step": 10
+    },
+    {
+      "epoch": 0.28,
+      "learning_rate": 5e-05,
+      "loss": 1.5645,
+      "step": 11
+    },
+    {
+      "epoch": 0.31,
+      "learning_rate": 5e-05,
+      "loss": 1.538,
+      "step": 12
+    },
+    {
+      "epoch": 0.34,
+      "learning_rate": 5e-05,
+      "loss": 1.6388,
+      "step": 13
+    },
+    {
+      "epoch": 0.36,
+      "learning_rate": 5e-05,
+      "loss": 1.4943,
+      "step": 14
+    },
+    {
+      "epoch": 0.39,
+      "learning_rate": 5e-05,
+      "loss": 1.5469,
+      "step": 15
+    },
+    {
+      "epoch": 0.41,
+      "learning_rate": 5e-05,
+      "loss": 1.6149,
+      "step": 16
+    },
+    {
+      "epoch": 0.44,
+      "learning_rate": 5e-05,
+      "loss": 1.5345,
+      "step": 17
+    },
+    {
+      "epoch": 0.47,
+      "learning_rate": 5e-05,
+      "loss": 1.4903,
+      "step": 18
+    },
+    {
+      "epoch": 0.49,
+      "learning_rate": 5e-05,
+      "loss": 1.5499,
+      "step": 19
+    },
+    {
+      "epoch": 0.52,
+      "learning_rate": 5e-05,
+      "loss": 1.5934,
+      "step": 20
+    },
+    {
+      "epoch": 0.52,
+      "eval_loss": 2.066981077194214,
+      "eval_runtime": 119.5781,
+      "eval_samples_per_second": 1.555,
+      "eval_steps_per_second": 0.778,
+      "step": 20
+    },
+    {
+      "epoch": 0.54,
+      "learning_rate": 5e-05,
+      "loss": 1.4554,
+      "step": 21
+    },
+    {
+      "epoch": 0.57,
+      "learning_rate": 5e-05,
+      "loss": 1.5512,
+      "step": 22
+    },
+    {
+      "epoch": 0.59,
+      "learning_rate": 5e-05,
+      "loss": 1.4636,
+      "step": 23
+    },
+    {
+      "epoch": 0.62,
+      "learning_rate": 5e-05,
+      "loss": 1.5398,
+      "step": 24
+    },
+    {
+      "epoch": 0.65,
+      "learning_rate": 5e-05,
+      "loss": 1.5623,
+      "step": 25
+    },
+    {
+      "epoch": 0.67,
+      "learning_rate": 5e-05,
+      "loss": 1.4658,
+      "step": 26
+    },
+    {
+      "epoch": 0.7,
+      "learning_rate": 5e-05,
+      "loss": 1.4723,
+      "step": 27
+    },
+    {
+      "epoch": 0.72,
+      "learning_rate": 5e-05,
+      "loss": 1.432,
+      "step": 28
+    },
+    {
+      "epoch": 0.75,
+      "learning_rate": 5e-05,
+      "loss": 1.4814,
+      "step": 29
+    },
+    {
+      "epoch": 0.78,
+      "learning_rate": 5e-05,
+      "loss": 1.4924,
+      "step": 30
+    },
+    {
+      "epoch": 0.78,
+      "eval_loss": 2.046339750289917,
+      "eval_runtime": 119.5771,
+      "eval_samples_per_second": 1.555,
+      "eval_steps_per_second": 0.778,
+      "step": 30
+    },
+    {
+      "epoch": 0.8,
+      "learning_rate": 5e-05,
+      "loss": 1.5809,
+      "step": 31
+    },
+    {
+      "epoch": 0.83,
+      "learning_rate": 5e-05,
+      "loss": 1.4803,
+      "step": 32
+    },
+    {
+      "epoch": 0.85,
+      "learning_rate": 5e-05,
+      "loss": 1.4878,
+      "step": 33
+    },
+    {
+      "epoch": 0.88,
+      "learning_rate": 5e-05,
+      "loss": 1.3871,
+      "step": 34
+    },
+    {
+      "epoch": 0.9,
+      "learning_rate": 5e-05,
+      "loss": 1.5151,
+      "step": 35
+    },
+    {
+      "epoch": 0.93,
+      "learning_rate": 5e-05,
+      "loss": 1.4212,
+      "step": 36
+    },
+    {
+      "epoch": 0.96,
+      "learning_rate": 5e-05,
+      "loss": 1.6284,
+      "step": 37
+    },
+    {
+      "epoch": 0.98,
+      "learning_rate": 5e-05,
+      "loss": 1.5002,
+      "step": 38
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 5e-05,
+      "loss": 1.4452,
+      "step": 39
+    },
+    {
+      "epoch": 1.03,
+      "learning_rate": 5e-05,
+      "loss": 1.4399,
+      "step": 40
+    },
+    {
+      "epoch": 1.03,
+      "eval_loss": 2.0354697704315186,
+      "eval_runtime": 119.5875,
+      "eval_samples_per_second": 1.555,
+      "eval_steps_per_second": 0.778,
+      "step": 40
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 114,
+  "num_train_epochs": 3,
+  "save_steps": 10,
+  "total_flos": 1.8296447921160192e+17,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-40/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ca362c6e6a1bbe2523b2190a501c92d6dbb3db6186bef551619d83852cca3df1
+size 4219

checkpoint-50/README.md ADDED Viewed

	@@ -0,0 +1,21 @@

+---
+library_name: peft
+---
+## Training procedure
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+- PEFT 0.6.0.dev0

checkpoint-50/adapter_config.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "NousResearch/Llama-2-13b-hf",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj",
+    "up_proj",
+    "gate_proj",
+    "o_proj",
+    "k_proj",
+    "down_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}