Safetensors
llama
xzuyn commited on
Commit
ddaf9ec
·
verified ·
1 Parent(s): b92f5e6

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +7 -4
README.md CHANGED
@@ -34,11 +34,11 @@ Example using a sample from [PJMixers/RyokoAI_Honeyfeed3600](https://huggingface
34
  wandb_project: LLaMa-3.2-1B
35
  wandb_entity:
36
  wandb_watch:
37
- wandb_name: LLaMa-3.2-Text-Cleaner-v0.1-1B-FFT-run2
38
  wandb_log_model:
39
 
40
  # Model checkpointing config
41
- output_dir: ./Outputs/LLaMa-3.2-Text-Cleaner-v0.1-1B-FFT-run2
42
  save_steps: 10
43
  save_safetensors: true
44
  save_total_limit: 2
@@ -48,6 +48,7 @@ save_only_model: true
48
  base_model: meta-llama/Llama-3.2-1B
49
  model_type: AutoModelForCausalLM
50
  tokenizer_type: AutoTokenizer
 
51
 
52
  # Mixed precision training config
53
  bf16: true
@@ -84,13 +85,12 @@ num_epochs: 1
84
  gradient_accumulation_steps: 1
85
  micro_batch_size: 8
86
  eval_batch_size: 8
87
- warmup_steps: 10
88
  optimizer: came_pytorch
89
  optim_args:
90
  enable_stochastic_rounding: true
91
  enable_cautious: true
92
  enable_8bit: true
93
- enable_gc: true
94
  lr_scheduler: rex
95
  learning_rate: 1e-6
96
  cosine_min_lr_ratio: 0.05
@@ -113,6 +113,9 @@ liger_glu_activation: true
113
  liger_cross_entropy: false
114
  liger_fused_linear_cross_entropy: false
115
 
 
 
 
116
  # Debug config
117
  debug: true
118
  seed: 42
 
34
  wandb_project: LLaMa-3.2-1B
35
  wandb_entity:
36
  wandb_watch:
37
+ wandb_name: LLaMa-3.2-Text-Cleaner-v0.1-1B-FFT-run4
38
  wandb_log_model:
39
 
40
  # Model checkpointing config
41
+ output_dir: ./Outputs/LLaMa-3.2-Text-Cleaner-v0.1-1B-FFT-run4
42
  save_steps: 10
43
  save_safetensors: true
44
  save_total_limit: 2
 
48
  base_model: meta-llama/Llama-3.2-1B
49
  model_type: AutoModelForCausalLM
50
  tokenizer_type: AutoTokenizer
51
+ chat_template_jinja: "{{- bos_token }}{% for message in messages %}{% if message['role'] == 'system' %}{{ raise_exception('Model does not support system turns.') }}{% elif message['role'] == 'user' %}{{ '<|unclean_text|>' + message['content'] | trim }}{% elif message['role'] == 'assistant' %}{{ '<|clean_text|>' + message['content'] | trim + eos_token }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|clean_text|>' }}{% endif %}"
52
 
53
  # Mixed precision training config
54
  bf16: true
 
85
  gradient_accumulation_steps: 1
86
  micro_batch_size: 8
87
  eval_batch_size: 8
88
+ warmup_steps: 0
89
  optimizer: came_pytorch
90
  optim_args:
91
  enable_stochastic_rounding: true
92
  enable_cautious: true
93
  enable_8bit: true
 
94
  lr_scheduler: rex
95
  learning_rate: 1e-6
96
  cosine_min_lr_ratio: 0.05
 
113
  liger_cross_entropy: false
114
  liger_fused_linear_cross_entropy: false
115
 
116
+ # Garbage Collection
117
+ gc_steps: 1
118
+
119
  # Debug config
120
  debug: true
121
  seed: 42