non-qa-sft-zephyr-7b-beta-v1 / config_argument.yaml
hllj's picture
Model save
006bb98
raw
history blame
4.99 kB
!!python/tuple
- !!python/object:__main__.ModelArguments
bnb_4bit_quant_type: nf4
cache_dir: ./cache
device_map: auto
load_in_4bit: true
load_in_8bit: false
model_name_or_path: HuggingFaceH4/zephyr-7b-beta
model_revision: main
model_type: auto
neft_alpha: 0
rope_scaling: null
shift_attn: false
tokenizer_name_or_path: null
torch_dtype: float16
trust_remote_code: true
use_bnb_nested_quant: false
use_fast_tokenizer: false
use_flash_attention_2: false
- !!python/object:__main__.DataArguments
dataset_config_name: null
dataset_name: null
ignore_pad_token_for_loss: true
max_eval_samples: null
max_train_samples: null
overwrite_cache: false
preprocessing_num_workers: 4
template_name: vicuna
train_file_dir: datasets/finetune
validation_file_dir: null
validation_split_percentage: 10
- !!python/object:__main__.SFTConfig
__cached__setup_devices: !!python/object/apply:torch.device
- cuda
- 0
_n_gpu: 1
adafactor: false
adam_beta1: 0.9
adam_beta2: 0.999
adam_epsilon: 1.0e-08
auto_find_batch_size: false
bf16: false
bf16_full_eval: false
data_seed: null
dataloader_drop_last: false
dataloader_num_workers: 0
dataloader_pin_memory: true
ddp_backend: null
ddp_broadcast_buffers: null
ddp_bucket_cap_mb: null
ddp_find_unused_parameters: false
ddp_timeout: 30000
debug: []
deepspeed: null
deepspeed_plugin: null
disable_tqdm: false
dispatch_batches: null
distributed_state: !!python/object:accelerate.state.PartialState
_cpu: false
backend: null
debug: false
device: !!python/object/apply:torch.device
- cuda
- 0
distributed_type: !!python/object/apply:accelerate.utils.dataclasses.DistributedType
- MULTI_GPU
fork_launched: false
local_process_index: 0
num_processes: 1
process_index: 0
do_eval: true
do_predict: false
do_train: true
eval_accumulation_steps: null
eval_delay: 0
eval_steps: 25
evaluation_strategy: !!python/object/apply:transformers.trainer_utils.IntervalStrategy
- steps
fp16: false
fp16_backend: auto
fp16_full_eval: false
fp16_opt_level: O1
fsdp: []
fsdp_config:
min_num_params: 0
xla: false
xla_fsdp_grad_ckpt: false
fsdp_min_num_params: 0
fsdp_transformer_layer_cls_to_wrap: null
full_determinism: false
gradient_accumulation_steps: 1
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
greater_is_better: null
group_by_length: false
half_precision_backend: auto
hub_always_push: false
hub_model_id: hllj/non-qa-sft-zephyr-7b-beta-v1
hub_private_repo: false
hub_strategy: !!python/object/apply:transformers.trainer_utils.HubStrategy
- every_save
hub_token: null
ignore_data_skip: false
include_inputs_for_metrics: false
include_tokens_per_second: false
jit_mode_eval: false
label_names: null
label_smoothing_factor: 0.0
learning_rate: 3.0e-05
length_column_name: length
load_best_model_at_end: false
local_rank: 0
log_level: info
log_level_replica: warning
log_on_each_node: true
logging_dir: outputs-sft-zephyr-beta-v1/runs/Nov22_05-52-29_a72e59c0abac
logging_first_step: true
logging_nan_inf_filter: true
logging_steps: 10
logging_strategy: !!python/object/apply:transformers.trainer_utils.IntervalStrategy
- steps
lr_scheduler_type: !!python/object/apply:transformers.trainer_utils.SchedulerType
- cosine
max_grad_norm: 1.0
max_seq_length: 512
max_steps: 50
metric_for_best_model: null
mp_parameters: ''
neftune_noise_alpha: null
no_cuda: false
num_train_epochs: 3.0
optim: !!python/object/apply:transformers.training_args.OptimizerNames
- adamw_torch
optim_args: null
output_dir: outputs-sft-zephyr-beta-v1
overwrite_output_dir: true
past_index: -1
per_device_eval_batch_size: 4
per_device_train_batch_size: 4
per_gpu_eval_batch_size: null
per_gpu_train_batch_size: null
prediction_loss_only: false
push_to_hub: true
push_to_hub_model_id: null
push_to_hub_organization: null
push_to_hub_token: null
ray_scope: last
remove_unused_columns: true
report_to:
- wandb
resume_from_checkpoint: null
run_name: sft-zephyr-7b-beta-v1
save_on_each_node: false
save_safetensors: true
save_steps: 25
save_strategy: !!python/object/apply:transformers.trainer_utils.IntervalStrategy
- steps
save_total_limit: 13
seed: 42
skip_memory_metrics: true
split_batches: false
tf32: null
torch_compile: false
torch_compile_backend: null
torch_compile_mode: null
torchdynamo: null
tpu_metrics_debug: false
tpu_num_cores: null
use_cpu: false
use_ipex: false
use_legacy_prediction_loop: false
use_mps_device: false
warmup_ratio: 0.05
warmup_steps: 0
weight_decay: 0.05
- !!python/object:__main__.ScriptArguments
lora_alpha: 16
lora_dropout: 0.1
lora_modules_to_save: null
lora_r: 64
lora_target_modules:
- q_proj
- k_proj
- v_proj
- o_proj
peft_path: null
use_peft: true