Update README.md
Browse files
README.md
CHANGED
@@ -34,11 +34,11 @@ Example using a sample from [PJMixers/RyokoAI_Honeyfeed3600](https://huggingface
|
|
34 |
wandb_project: LLaMa-3.2-1B
|
35 |
wandb_entity:
|
36 |
wandb_watch:
|
37 |
-
wandb_name: LLaMa-3.2-Text-Cleaner-v0.1-1B-FFT-
|
38 |
wandb_log_model:
|
39 |
|
40 |
# Model checkpointing config
|
41 |
-
output_dir: ./Outputs/LLaMa-3.2-Text-Cleaner-v0.1-1B-FFT-
|
42 |
save_steps: 10
|
43 |
save_safetensors: true
|
44 |
save_total_limit: 2
|
@@ -48,6 +48,7 @@ save_only_model: true
|
|
48 |
base_model: meta-llama/Llama-3.2-1B
|
49 |
model_type: AutoModelForCausalLM
|
50 |
tokenizer_type: AutoTokenizer
|
|
|
51 |
|
52 |
# Mixed precision training config
|
53 |
bf16: true
|
@@ -84,13 +85,12 @@ num_epochs: 1
|
|
84 |
gradient_accumulation_steps: 1
|
85 |
micro_batch_size: 8
|
86 |
eval_batch_size: 8
|
87 |
-
warmup_steps:
|
88 |
optimizer: came_pytorch
|
89 |
optim_args:
|
90 |
enable_stochastic_rounding: true
|
91 |
enable_cautious: true
|
92 |
enable_8bit: true
|
93 |
-
enable_gc: true
|
94 |
lr_scheduler: rex
|
95 |
learning_rate: 1e-6
|
96 |
cosine_min_lr_ratio: 0.05
|
@@ -113,6 +113,9 @@ liger_glu_activation: true
|
|
113 |
liger_cross_entropy: false
|
114 |
liger_fused_linear_cross_entropy: false
|
115 |
|
|
|
|
|
|
|
116 |
# Debug config
|
117 |
debug: true
|
118 |
seed: 42
|
|
|
34 |
wandb_project: LLaMa-3.2-1B
|
35 |
wandb_entity:
|
36 |
wandb_watch:
|
37 |
+
wandb_name: LLaMa-3.2-Text-Cleaner-v0.1-1B-FFT-run4
|
38 |
wandb_log_model:
|
39 |
|
40 |
# Model checkpointing config
|
41 |
+
output_dir: ./Outputs/LLaMa-3.2-Text-Cleaner-v0.1-1B-FFT-run4
|
42 |
save_steps: 10
|
43 |
save_safetensors: true
|
44 |
save_total_limit: 2
|
|
|
48 |
base_model: meta-llama/Llama-3.2-1B
|
49 |
model_type: AutoModelForCausalLM
|
50 |
tokenizer_type: AutoTokenizer
|
51 |
+
chat_template_jinja: "{{- bos_token }}{% for message in messages %}{% if message['role'] == 'system' %}{{ raise_exception('Model does not support system turns.') }}{% elif message['role'] == 'user' %}{{ '<|unclean_text|>' + message['content'] | trim }}{% elif message['role'] == 'assistant' %}{{ '<|clean_text|>' + message['content'] | trim + eos_token }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|clean_text|>' }}{% endif %}"
|
52 |
|
53 |
# Mixed precision training config
|
54 |
bf16: true
|
|
|
85 |
gradient_accumulation_steps: 1
|
86 |
micro_batch_size: 8
|
87 |
eval_batch_size: 8
|
88 |
+
warmup_steps: 0
|
89 |
optimizer: came_pytorch
|
90 |
optim_args:
|
91 |
enable_stochastic_rounding: true
|
92 |
enable_cautious: true
|
93 |
enable_8bit: true
|
|
|
94 |
lr_scheduler: rex
|
95 |
learning_rate: 1e-6
|
96 |
cosine_min_lr_ratio: 0.05
|
|
|
113 |
liger_cross_entropy: false
|
114 |
liger_fused_linear_cross_entropy: false
|
115 |
|
116 |
+
# Garbage Collection
|
117 |
+
gc_steps: 1
|
118 |
+
|
119 |
# Debug config
|
120 |
debug: true
|
121 |
seed: 42
|