uiuc-convai
/

CoALM-405B

Text Generation

Model card Files Files and versions

Nessii013 commited on Feb 10

Commit

0f830f9

·

verified ·

1 Parent(s): 59fd3eb

Create oumi/oumi_train.yaml

Files changed (1) hide show

oumi/oumi_train.yaml +92 -0

oumi/oumi_train.yaml ADDED Viewed

	@@ -0,0 +1,92 @@

+# Lora config for CALM 405B.
+model:
+  model_name: "meta-llama/Llama-3.1-405B-Instruct"
+  model_max_length: 4096
+  torch_dtype_str: "bfloat16"
+  attn_implementation: "sdpa"
+  load_pretrained_weights: True
+  trust_remote_code: True
+  tokenizer_pad_token: "<|finetune_right_pad_id|>"
+  enable_liger_kernel: True
+data:
+  train:
+    datasets:
+      - dataset_name: "text_sft_jsonl"
+        dataset_path: "/path/to/training/dataset.jsonl"
+        shuffle: True
+        seed: 42
+    collator_name: "text_completions_only_with_padding"
+    target_col: "messages"
+    use_async_dataset: True
+    seed: 42
+  validation:
+    datasets:
+      - dataset_name: "text_sft_jsonl"
+        dataset_path: "/path/to/validation/dataset.jsonl"
+    collator_name: "text_completions_only_with_padding"
+    target_col: "messages"
+    use_async_dataset: True
+    seed: 42
+training:
+  trainer_type: "TRL_SFT"
+  use_peft: True
+  save_steps: 500
+  num_train_epochs: 1
+  per_device_train_batch_size: 2
+  gradient_accumulation_steps: 1
+  eval_strategy: "steps"
+  eval_steps: 500
+  per_device_eval_batch_size: 1
+  enable_gradient_checkpointing: True
+  gradient_checkpointing_kwargs:
+    use_reentrant: False
+  ddp_find_unused_parameters: False
+  optimizer: "adamw_torch_fused"
+  learning_rate: 1.0e-04
+  warmup_ratio: 0.1
+  weight_decay: 0.01
+  max_grad_norm: 10
+  compile: False
+  dataloader_num_workers: "auto"
+  dataloader_prefetch_factor: 32
+  logging_steps: 50
+  log_model_summary: False
+  empty_device_cache_steps: 1
+  include_performance_metrics: True
+  output_dir: "output/llama405b.qlora"
+  enable_wandb: True
+peft:
+  q_lora: True
+  # https://github.com/pytorch/torchtune/blob/37337f71677da69f0967a9cde34b96ad7fec3cb6/torchtune/modules/peft/lora.py#L95
+  bnb_4bit_quant_type: "nf4"
+  # Must use a float type for quantized data storage. See:
+  # https://huggingface.co/docs/bitsandbytes/main/en/fsdp_qlora#quantized-data-storage.
+  bnb_4bit_quant_storage: "bfloat16"
+  bnb_4bit_compute_dtype: "bfloat16"
+  lora_r: 16
+  lora_alpha: 32
+  lora_dropout: 0.0
+  lora_target_modules:
+    - q_proj
+    - k_proj
+    - v_proj
+    - o_proj
+    - gate_proj
+    - up_proj
+    - down_proj
+fsdp:
+  enable_fsdp: True
+  forward_prefetch: True
+  backward_prefetch: "BACKWARD_POST"
+  use_orig_params: True
+  cpu_offload: True
+  auto_wrap_policy: "TRANSFORMER_BASED_WRAP"
+  transformer_layer_cls: "LlamaDecoderLayer"