File size: 4,990 Bytes
006bb98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
!!python/tuple
- !!python/object:__main__.ModelArguments
  bnb_4bit_quant_type: nf4
  cache_dir: ./cache
  device_map: auto
  load_in_4bit: true
  load_in_8bit: false
  model_name_or_path: HuggingFaceH4/zephyr-7b-beta
  model_revision: main
  model_type: auto
  neft_alpha: 0
  rope_scaling: null
  shift_attn: false
  tokenizer_name_or_path: null
  torch_dtype: float16
  trust_remote_code: true
  use_bnb_nested_quant: false
  use_fast_tokenizer: false
  use_flash_attention_2: false
- !!python/object:__main__.DataArguments
  dataset_config_name: null
  dataset_name: null
  ignore_pad_token_for_loss: true
  max_eval_samples: null
  max_train_samples: null
  overwrite_cache: false
  preprocessing_num_workers: 4
  template_name: vicuna
  train_file_dir: datasets/finetune
  validation_file_dir: null
  validation_split_percentage: 10
- !!python/object:__main__.SFTConfig
  __cached__setup_devices: !!python/object/apply:torch.device
  - cuda
  - 0
  _n_gpu: 1
  adafactor: false
  adam_beta1: 0.9
  adam_beta2: 0.999
  adam_epsilon: 1.0e-08
  auto_find_batch_size: false
  bf16: false
  bf16_full_eval: false
  data_seed: null
  dataloader_drop_last: false
  dataloader_num_workers: 0
  dataloader_pin_memory: true
  ddp_backend: null
  ddp_broadcast_buffers: null
  ddp_bucket_cap_mb: null
  ddp_find_unused_parameters: false
  ddp_timeout: 30000
  debug: []
  deepspeed: null
  deepspeed_plugin: null
  disable_tqdm: false
  dispatch_batches: null
  distributed_state: !!python/object:accelerate.state.PartialState
    _cpu: false
    backend: null
    debug: false
    device: !!python/object/apply:torch.device
    - cuda
    - 0
    distributed_type: !!python/object/apply:accelerate.utils.dataclasses.DistributedType
    - MULTI_GPU
    fork_launched: false
    local_process_index: 0
    num_processes: 1
    process_index: 0
  do_eval: true
  do_predict: false
  do_train: true
  eval_accumulation_steps: null
  eval_delay: 0
  eval_steps: 25
  evaluation_strategy: !!python/object/apply:transformers.trainer_utils.IntervalStrategy
  - steps
  fp16: false
  fp16_backend: auto
  fp16_full_eval: false
  fp16_opt_level: O1
  fsdp: []
  fsdp_config:
    min_num_params: 0
    xla: false
    xla_fsdp_grad_ckpt: false
  fsdp_min_num_params: 0
  fsdp_transformer_layer_cls_to_wrap: null
  full_determinism: false
  gradient_accumulation_steps: 1
  gradient_checkpointing: true
  gradient_checkpointing_kwargs:
    use_reentrant: false
  greater_is_better: null
  group_by_length: false
  half_precision_backend: auto
  hub_always_push: false
  hub_model_id: hllj/non-qa-sft-zephyr-7b-beta-v1
  hub_private_repo: false
  hub_strategy: !!python/object/apply:transformers.trainer_utils.HubStrategy
  - every_save
  hub_token: null
  ignore_data_skip: false
  include_inputs_for_metrics: false
  include_tokens_per_second: false
  jit_mode_eval: false
  label_names: null
  label_smoothing_factor: 0.0
  learning_rate: 3.0e-05
  length_column_name: length
  load_best_model_at_end: false
  local_rank: 0
  log_level: info
  log_level_replica: warning
  log_on_each_node: true
  logging_dir: outputs-sft-zephyr-beta-v1/runs/Nov22_05-52-29_a72e59c0abac
  logging_first_step: true
  logging_nan_inf_filter: true
  logging_steps: 10
  logging_strategy: !!python/object/apply:transformers.trainer_utils.IntervalStrategy
  - steps
  lr_scheduler_type: !!python/object/apply:transformers.trainer_utils.SchedulerType
  - cosine
  max_grad_norm: 1.0
  max_seq_length: 512
  max_steps: 50
  metric_for_best_model: null
  mp_parameters: ''
  neftune_noise_alpha: null
  no_cuda: false
  num_train_epochs: 3.0
  optim: !!python/object/apply:transformers.training_args.OptimizerNames
  - adamw_torch
  optim_args: null
  output_dir: outputs-sft-zephyr-beta-v1
  overwrite_output_dir: true
  past_index: -1
  per_device_eval_batch_size: 4
  per_device_train_batch_size: 4
  per_gpu_eval_batch_size: null
  per_gpu_train_batch_size: null
  prediction_loss_only: false
  push_to_hub: true
  push_to_hub_model_id: null
  push_to_hub_organization: null
  push_to_hub_token: null
  ray_scope: last
  remove_unused_columns: true
  report_to:
  - wandb
  resume_from_checkpoint: null
  run_name: sft-zephyr-7b-beta-v1
  save_on_each_node: false
  save_safetensors: true
  save_steps: 25
  save_strategy: !!python/object/apply:transformers.trainer_utils.IntervalStrategy
  - steps
  save_total_limit: 13
  seed: 42
  skip_memory_metrics: true
  split_batches: false
  tf32: null
  torch_compile: false
  torch_compile_backend: null
  torch_compile_mode: null
  torchdynamo: null
  tpu_metrics_debug: false
  tpu_num_cores: null
  use_cpu: false
  use_ipex: false
  use_legacy_prediction_loop: false
  use_mps_device: false
  warmup_ratio: 0.05
  warmup_steps: 0
  weight_decay: 0.05
- !!python/object:__main__.ScriptArguments
  lora_alpha: 16
  lora_dropout: 0.1
  lora_modules_to_save: null
  lora_r: 64
  lora_target_modules:
  - q_proj
  - k_proj
  - v_proj
  - o_proj
  peft_path: null
  use_peft: true