Upload folder using huggingface_hub

Browse files

Files changed (4) hide show

ipt_fineinstructions_all_exp_chat_sft/hf/README.md +2 -2
ipt_fineinstructions_all_exp_chat_sft/hf/config.json +1 -1
ipt_fineinstructions_all_exp_chat_sft/hf/model.safetensors +1 -1
ipt_fineinstructions_all_exp_chat_sft/hf/training_args.json +8 -6

ipt_fineinstructions_all_exp_chat_sft/hf/README.md CHANGED Viewed

@@ -5,9 +5,9 @@
 ```python3
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
-tokenizer = AutoTokenizer.from_pretrained('/mnt/nlpgpu-io1/data/ajayp/output/fineinstructions/dated/2025-08-21-02:05:11/data/sft_fineinstructions-pretraining_experiments-ipt_fineinstructions_all_exp_chat-hf/export_path', revision=None) # Load tokenizer
 tokenizer.padding_side = 'left'
-model = AutoModelForCausalLM.from_pretrained('/mnt/nlpgpu-io1/data/ajayp/output/fineinstructions/dated/2025-08-21-02:05:11/data/sft_fineinstructions-pretraining_experiments-ipt_fineinstructions_all_exp_chat-hf/export_path', revision=None) # Load model
 pipe = pipeline('text-generation', model=model, tokenizer=tokenizer, pad_token_id=tokenizer.pad_token_id, return_full_text=False)
 inputs = ["<|start_header_id|>user<|end_header_id|>\nI want you to use the Titanic dataset available in the seaborn package. You are required not only to conduct Preprocessing, Exploratory Data Analysis and Prediction using this dataset, but also perform Feature Engineering. \n\nAdditionally, here is a snippet of a model trained with Logistic Regression:\n\n```python\nimport seaborn as sns\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.linear_model import LogisticRegression\n\ndf = sns.load_dataset('titanic')\n\nx_train, x_test, y_train, y_test = train_test_split(\n    df.drop('survived', axis=1), df['survived'], test_size=0.25, random_state=42)\n\nmodel = LogisticRegression()\nmodel.fit(x_train, y_train)\n```\n\nThe above code results in an error. Identify the error and modify the code accordingly. \n\nFurthermore, provide high-quality documentation for your code, giving detailed insights into every step of your analysis from data preprocessing, analysis to prediction. Lastly, make use of visualization tools to present the results in a more understandable manner. Your code should be efficient and well-structured so it's easily readable and maintainable.\n\n<|start_header_id|>assistant<|end_header_id|>\n"]

 ```python3
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+tokenizer = AutoTokenizer.from_pretrained('/mnt/nlpgpu-io1/data/ajayp/output/fineinstructions/dated/2025-08-21-02:05:11/data/sft_v2_fineinstructions-pretraining_experiments-ipt_fineinstructions_all_exp_chat-hf/export_path', revision=None) # Load tokenizer
 tokenizer.padding_side = 'left'
+model = AutoModelForCausalLM.from_pretrained('/mnt/nlpgpu-io1/data/ajayp/output/fineinstructions/dated/2025-08-21-02:05:11/data/sft_v2_fineinstructions-pretraining_experiments-ipt_fineinstructions_all_exp_chat-hf/export_path', revision=None) # Load model
 pipe = pipeline('text-generation', model=model, tokenizer=tokenizer, pad_token_id=tokenizer.pad_token_id, return_full_text=False)
 inputs = ["<|start_header_id|>user<|end_header_id|>\nI want you to use the Titanic dataset available in the seaborn package. You are required not only to conduct Preprocessing, Exploratory Data Analysis and Prediction using this dataset, but also perform Feature Engineering. \n\nAdditionally, here is a snippet of a model trained with Logistic Regression:\n\n```python\nimport seaborn as sns\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.linear_model import LogisticRegression\n\ndf = sns.load_dataset('titanic')\n\nx_train, x_test, y_train, y_test = train_test_split(\n    df.drop('survived', axis=1), df['survived'], test_size=0.25, random_state=42)\n\nmodel = LogisticRegression()\nmodel.fit(x_train, y_train)\n```\n\nThe above code results in an error. Identify the error and modify the code accordingly. \n\nFurthermore, provide high-quality documentation for your code, giving detailed insights into every step of your analysis from data preprocessing, analysis to prediction. Lastly, make use of visualization tools to present the results in a more understandable manner. Your code should be efficient and well-structured so it's easily readable and maintainable.\n\n<|start_header_id|>assistant<|end_header_id|>\n"]

ipt_fineinstructions_all_exp_chat_sft/hf/config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "/nlpgpu/data/ajayp/.cache/huggingface_cache/hub/models--fineinstructions--pretraining_experiments/snapshots/f9823811a3bb304ad8b9d9650755d91726e680d5/ipt_fineinstructions_all_exp_chat/hf",
   "architectures": [
     "LlamaForCausalLM"
   ],

 {
+  "_name_or_path": "/nlpgpu/data/ajayp/.cache/huggingface_cache/hub/models--fineinstructions--pretraining_experiments/snapshots/d439b1b70314c7d817673b9d45a57ea5f71a7c7c/ipt_fineinstructions_all_exp_chat/hf",
   "architectures": [
     "LlamaForCausalLM"
   ],

ipt_fineinstructions_all_exp_chat_sft/hf/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d437f593933e293af576bbceedc06e14d37c97acd832e1332d7059730d8e2477
 size 3619919680

 version https://git-lfs.github.com/spec/v1
+oid sha256:3485c04ce78a27ac17ee458d4f25657504b0b086759740becdbb8ba305cc64d7
 size 3619919680

ipt_fineinstructions_all_exp_chat_sft/hf/training_args.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-    "output_dir": "/mnt/nlpgpu-io1/data/ajayp/output/fineinstructions/dated/2025-08-21-02:05:11/data/sft_fineinstructions-pretraining_experiments-ipt_fineinstructions_all_exp_chat-hf/post-train-sft/_checkpoints",
     "overwrite_output_dir": false,
     "do_train": false,
     "do_eval": true,
@@ -14,7 +14,7 @@
     "eval_accumulation_steps": 1,
     "eval_delay": 0,
     "torch_empty_cache_steps": null,
-    "learning_rate": 0.0001,
     "weight_decay": 0.01,
     "adam_beta1": 0.9,
     "adam_beta2": 0.999,
@@ -22,14 +22,16 @@
     "max_grad_norm": 1.0,
     "num_train_epochs": 30,
     "max_steps": -1,
-    "lr_scheduler_type": "constant",
-    "lr_scheduler_kwargs": {},
     "warmup_ratio": 0.0,
-    "warmup_steps": 0,
     "log_level": "passive",
     "log_level_replica": "warning",
     "log_on_each_node": true,
-    "logging_dir": "/mnt/nlpgpu-io1/data/ajayp/output/fineinstructions/dated/2025-08-21-02:05:11/data/sft_fineinstructions-pretraining_experiments-ipt_fineinstructions_all_exp_chat-hf/post-train-sft/_checkpoints/runs/Aug21_03-06-43_nlpgpu06.seas.upenn.edu",
     "logging_strategy": "steps",
     "logging_first_step": false,
     "logging_steps": 1,

 {
+    "output_dir": "/mnt/nlpgpu-io1/data/ajayp/output/fineinstructions/dated/2025-08-21-02:05:11/data/sft_v2_fineinstructions-pretraining_experiments-ipt_fineinstructions_all_exp_chat-hf/post-train-sft/_checkpoints",
     "overwrite_output_dir": false,
     "do_train": false,
     "do_eval": true,
     "eval_accumulation_steps": 1,
     "eval_delay": 0,
     "torch_empty_cache_steps": null,
+    "learning_rate": 0.001,
     "weight_decay": 0.01,
     "adam_beta1": 0.9,
     "adam_beta2": 0.999,
     "max_grad_norm": 1.0,
     "num_train_epochs": 30,
     "max_steps": -1,
+    "lr_scheduler_type": "cosine_with_restarts",
+    "lr_scheduler_kwargs": {
+        "num_cycles": 5
+    },
     "warmup_ratio": 0.0,
+    "warmup_steps": 8000,
     "log_level": "passive",
     "log_level_replica": "warning",
     "log_on_each_node": true,
+    "logging_dir": "/mnt/nlpgpu-io1/data/ajayp/output/fineinstructions/dated/2025-08-21-02:05:11/data/sft_v2_fineinstructions-pretraining_experiments-ipt_fineinstructions_all_exp_chat-hf/post-train-sft/_checkpoints/runs/Aug21_18-32-45_nlpgpu06.seas.upenn.edu",
     "logging_strategy": "steps",
     "logging_first_step": false,
     "logging_steps": 1,