AjayP13 commited on
Commit
5052746
·
verified ·
1 Parent(s): f2d261f

Upload folder using huggingface_hub

Browse files
ipt_fineinstructions_all_exp_chat_sft/hf/README.md CHANGED
@@ -5,9 +5,9 @@
5
  ```python3
6
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
7
 
8
- tokenizer = AutoTokenizer.from_pretrained('/mnt/nlpgpu-io1/data/ajayp/output/fineinstructions/dated/2025-08-21-02:05:11/data/sft_fineinstructions-pretraining_experiments-ipt_fineinstructions_all_exp_chat-hf/export_path', revision=None) # Load tokenizer
9
  tokenizer.padding_side = 'left'
10
- model = AutoModelForCausalLM.from_pretrained('/mnt/nlpgpu-io1/data/ajayp/output/fineinstructions/dated/2025-08-21-02:05:11/data/sft_fineinstructions-pretraining_experiments-ipt_fineinstructions_all_exp_chat-hf/export_path', revision=None) # Load model
11
  pipe = pipeline('text-generation', model=model, tokenizer=tokenizer, pad_token_id=tokenizer.pad_token_id, return_full_text=False)
12
 
13
  inputs = ["<|start_header_id|>user<|end_header_id|>\nI want you to use the Titanic dataset available in the seaborn package. You are required not only to conduct Preprocessing, Exploratory Data Analysis and Prediction using this dataset, but also perform Feature Engineering. \n\nAdditionally, here is a snippet of a model trained with Logistic Regression:\n\n```python\nimport seaborn as sns\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.linear_model import LogisticRegression\n\ndf = sns.load_dataset('titanic')\n\nx_train, x_test, y_train, y_test = train_test_split(\n df.drop('survived', axis=1), df['survived'], test_size=0.25, random_state=42)\n\nmodel = LogisticRegression()\nmodel.fit(x_train, y_train)\n```\n\nThe above code results in an error. Identify the error and modify the code accordingly. \n\nFurthermore, provide high-quality documentation for your code, giving detailed insights into every step of your analysis from data preprocessing, analysis to prediction. Lastly, make use of visualization tools to present the results in a more understandable manner. Your code should be efficient and well-structured so it's easily readable and maintainable.\n\n<|start_header_id|>assistant<|end_header_id|>\n"]
 
5
  ```python3
6
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
7
 
8
+ tokenizer = AutoTokenizer.from_pretrained('/mnt/nlpgpu-io1/data/ajayp/output/fineinstructions/dated/2025-08-21-02:05:11/data/sft_v2_fineinstructions-pretraining_experiments-ipt_fineinstructions_all_exp_chat-hf/export_path', revision=None) # Load tokenizer
9
  tokenizer.padding_side = 'left'
10
+ model = AutoModelForCausalLM.from_pretrained('/mnt/nlpgpu-io1/data/ajayp/output/fineinstructions/dated/2025-08-21-02:05:11/data/sft_v2_fineinstructions-pretraining_experiments-ipt_fineinstructions_all_exp_chat-hf/export_path', revision=None) # Load model
11
  pipe = pipeline('text-generation', model=model, tokenizer=tokenizer, pad_token_id=tokenizer.pad_token_id, return_full_text=False)
12
 
13
  inputs = ["<|start_header_id|>user<|end_header_id|>\nI want you to use the Titanic dataset available in the seaborn package. You are required not only to conduct Preprocessing, Exploratory Data Analysis and Prediction using this dataset, but also perform Feature Engineering. \n\nAdditionally, here is a snippet of a model trained with Logistic Regression:\n\n```python\nimport seaborn as sns\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.linear_model import LogisticRegression\n\ndf = sns.load_dataset('titanic')\n\nx_train, x_test, y_train, y_test = train_test_split(\n df.drop('survived', axis=1), df['survived'], test_size=0.25, random_state=42)\n\nmodel = LogisticRegression()\nmodel.fit(x_train, y_train)\n```\n\nThe above code results in an error. Identify the error and modify the code accordingly. \n\nFurthermore, provide high-quality documentation for your code, giving detailed insights into every step of your analysis from data preprocessing, analysis to prediction. Lastly, make use of visualization tools to present the results in a more understandable manner. Your code should be efficient and well-structured so it's easily readable and maintainable.\n\n<|start_header_id|>assistant<|end_header_id|>\n"]
ipt_fineinstructions_all_exp_chat_sft/hf/config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/nlpgpu/data/ajayp/.cache/huggingface_cache/hub/models--fineinstructions--pretraining_experiments/snapshots/f9823811a3bb304ad8b9d9650755d91726e680d5/ipt_fineinstructions_all_exp_chat/hf",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
 
1
  {
2
+ "_name_or_path": "/nlpgpu/data/ajayp/.cache/huggingface_cache/hub/models--fineinstructions--pretraining_experiments/snapshots/d439b1b70314c7d817673b9d45a57ea5f71a7c7c/ipt_fineinstructions_all_exp_chat/hf",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
ipt_fineinstructions_all_exp_chat_sft/hf/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d437f593933e293af576bbceedc06e14d37c97acd832e1332d7059730d8e2477
3
  size 3619919680
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3485c04ce78a27ac17ee458d4f25657504b0b086759740becdbb8ba305cc64d7
3
  size 3619919680
ipt_fineinstructions_all_exp_chat_sft/hf/training_args.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "output_dir": "/mnt/nlpgpu-io1/data/ajayp/output/fineinstructions/dated/2025-08-21-02:05:11/data/sft_fineinstructions-pretraining_experiments-ipt_fineinstructions_all_exp_chat-hf/post-train-sft/_checkpoints",
3
  "overwrite_output_dir": false,
4
  "do_train": false,
5
  "do_eval": true,
@@ -14,7 +14,7 @@
14
  "eval_accumulation_steps": 1,
15
  "eval_delay": 0,
16
  "torch_empty_cache_steps": null,
17
- "learning_rate": 0.0001,
18
  "weight_decay": 0.01,
19
  "adam_beta1": 0.9,
20
  "adam_beta2": 0.999,
@@ -22,14 +22,16 @@
22
  "max_grad_norm": 1.0,
23
  "num_train_epochs": 30,
24
  "max_steps": -1,
25
- "lr_scheduler_type": "constant",
26
- "lr_scheduler_kwargs": {},
 
 
27
  "warmup_ratio": 0.0,
28
- "warmup_steps": 0,
29
  "log_level": "passive",
30
  "log_level_replica": "warning",
31
  "log_on_each_node": true,
32
- "logging_dir": "/mnt/nlpgpu-io1/data/ajayp/output/fineinstructions/dated/2025-08-21-02:05:11/data/sft_fineinstructions-pretraining_experiments-ipt_fineinstructions_all_exp_chat-hf/post-train-sft/_checkpoints/runs/Aug21_03-06-43_nlpgpu06.seas.upenn.edu",
33
  "logging_strategy": "steps",
34
  "logging_first_step": false,
35
  "logging_steps": 1,
 
1
  {
2
+ "output_dir": "/mnt/nlpgpu-io1/data/ajayp/output/fineinstructions/dated/2025-08-21-02:05:11/data/sft_v2_fineinstructions-pretraining_experiments-ipt_fineinstructions_all_exp_chat-hf/post-train-sft/_checkpoints",
3
  "overwrite_output_dir": false,
4
  "do_train": false,
5
  "do_eval": true,
 
14
  "eval_accumulation_steps": 1,
15
  "eval_delay": 0,
16
  "torch_empty_cache_steps": null,
17
+ "learning_rate": 0.001,
18
  "weight_decay": 0.01,
19
  "adam_beta1": 0.9,
20
  "adam_beta2": 0.999,
 
22
  "max_grad_norm": 1.0,
23
  "num_train_epochs": 30,
24
  "max_steps": -1,
25
+ "lr_scheduler_type": "cosine_with_restarts",
26
+ "lr_scheduler_kwargs": {
27
+ "num_cycles": 5
28
+ },
29
  "warmup_ratio": 0.0,
30
+ "warmup_steps": 8000,
31
  "log_level": "passive",
32
  "log_level_replica": "warning",
33
  "log_on_each_node": true,
34
+ "logging_dir": "/mnt/nlpgpu-io1/data/ajayp/output/fineinstructions/dated/2025-08-21-02:05:11/data/sft_v2_fineinstructions-pretraining_experiments-ipt_fineinstructions_all_exp_chat-hf/post-train-sft/_checkpoints/runs/Aug21_18-32-45_nlpgpu06.seas.upenn.edu",
35
  "logging_strategy": "steps",
36
  "logging_first_step": false,
37
  "logging_steps": 1,