Update README.md
Browse files
README.md
CHANGED
|
@@ -34,11 +34,11 @@ Example using a sample from [PJMixers/RyokoAI_Honeyfeed3600](https://huggingface
|
|
| 34 |
wandb_project: LLaMa-3.2-1B
|
| 35 |
wandb_entity:
|
| 36 |
wandb_watch:
|
| 37 |
-
wandb_name: LLaMa-3.2-Text-Cleaner-v0.1-1B-FFT-
|
| 38 |
wandb_log_model:
|
| 39 |
|
| 40 |
# Model checkpointing config
|
| 41 |
-
output_dir: ./Outputs/LLaMa-3.2-Text-Cleaner-v0.1-1B-FFT-
|
| 42 |
save_steps: 10
|
| 43 |
save_safetensors: true
|
| 44 |
save_total_limit: 2
|
|
@@ -48,6 +48,7 @@ save_only_model: true
|
|
| 48 |
base_model: meta-llama/Llama-3.2-1B
|
| 49 |
model_type: AutoModelForCausalLM
|
| 50 |
tokenizer_type: AutoTokenizer
|
|
|
|
| 51 |
|
| 52 |
# Mixed precision training config
|
| 53 |
bf16: true
|
|
@@ -84,13 +85,12 @@ num_epochs: 1
|
|
| 84 |
gradient_accumulation_steps: 1
|
| 85 |
micro_batch_size: 8
|
| 86 |
eval_batch_size: 8
|
| 87 |
-
warmup_steps:
|
| 88 |
optimizer: came_pytorch
|
| 89 |
optim_args:
|
| 90 |
enable_stochastic_rounding: true
|
| 91 |
enable_cautious: true
|
| 92 |
enable_8bit: true
|
| 93 |
-
enable_gc: true
|
| 94 |
lr_scheduler: rex
|
| 95 |
learning_rate: 1e-6
|
| 96 |
cosine_min_lr_ratio: 0.05
|
|
@@ -113,6 +113,9 @@ liger_glu_activation: true
|
|
| 113 |
liger_cross_entropy: false
|
| 114 |
liger_fused_linear_cross_entropy: false
|
| 115 |
|
|
|
|
|
|
|
|
|
|
| 116 |
# Debug config
|
| 117 |
debug: true
|
| 118 |
seed: 42
|
|
|
|
| 34 |
wandb_project: LLaMa-3.2-1B
|
| 35 |
wandb_entity:
|
| 36 |
wandb_watch:
|
| 37 |
+
wandb_name: LLaMa-3.2-Text-Cleaner-v0.1-1B-FFT-run4
|
| 38 |
wandb_log_model:
|
| 39 |
|
| 40 |
# Model checkpointing config
|
| 41 |
+
output_dir: ./Outputs/LLaMa-3.2-Text-Cleaner-v0.1-1B-FFT-run4
|
| 42 |
save_steps: 10
|
| 43 |
save_safetensors: true
|
| 44 |
save_total_limit: 2
|
|
|
|
| 48 |
base_model: meta-llama/Llama-3.2-1B
|
| 49 |
model_type: AutoModelForCausalLM
|
| 50 |
tokenizer_type: AutoTokenizer
|
| 51 |
+
chat_template_jinja: "{{- bos_token }}{% for message in messages %}{% if message['role'] == 'system' %}{{ raise_exception('Model does not support system turns.') }}{% elif message['role'] == 'user' %}{{ '<|unclean_text|>' + message['content'] | trim }}{% elif message['role'] == 'assistant' %}{{ '<|clean_text|>' + message['content'] | trim + eos_token }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|clean_text|>' }}{% endif %}"
|
| 52 |
|
| 53 |
# Mixed precision training config
|
| 54 |
bf16: true
|
|
|
|
| 85 |
gradient_accumulation_steps: 1
|
| 86 |
micro_batch_size: 8
|
| 87 |
eval_batch_size: 8
|
| 88 |
+
warmup_steps: 0
|
| 89 |
optimizer: came_pytorch
|
| 90 |
optim_args:
|
| 91 |
enable_stochastic_rounding: true
|
| 92 |
enable_cautious: true
|
| 93 |
enable_8bit: true
|
|
|
|
| 94 |
lr_scheduler: rex
|
| 95 |
learning_rate: 1e-6
|
| 96 |
cosine_min_lr_ratio: 0.05
|
|
|
|
| 113 |
liger_cross_entropy: false
|
| 114 |
liger_fused_linear_cross_entropy: false
|
| 115 |
|
| 116 |
+
# Garbage Collection
|
| 117 |
+
gc_steps: 1
|
| 118 |
+
|
| 119 |
# Debug config
|
| 120 |
debug: true
|
| 121 |
seed: 42
|