PJMixers-Dev
/

LLaMa-3.2-Text-Cleaner-v0.1-1B

Model card Files Files and versions

xzuyn commited on Jun 15

Commit

ddaf9ec

·

verified ·

1 Parent(s): b92f5e6

Update README.md

Files changed (1) hide show

README.md +7 -4

README.md CHANGED Viewed

@@ -34,11 +34,11 @@ Example using a sample from [PJMixers/RyokoAI_Honeyfeed3600](https://huggingface
 wandb_project: LLaMa-3.2-1B
 wandb_entity:
 wandb_watch:
-wandb_name: LLaMa-3.2-Text-Cleaner-v0.1-1B-FFT-run2
 wandb_log_model:
 # Model checkpointing config
-output_dir: ./Outputs/LLaMa-3.2-Text-Cleaner-v0.1-1B-FFT-run2
 save_steps: 10
 save_safetensors: true
 save_total_limit: 2
@@ -48,6 +48,7 @@ save_only_model: true
 base_model: meta-llama/Llama-3.2-1B
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
 # Mixed precision training config
 bf16: true
@@ -84,13 +85,12 @@ num_epochs: 1
 gradient_accumulation_steps: 1
 micro_batch_size: 8
 eval_batch_size: 8
-warmup_steps: 10
 optimizer: came_pytorch
 optim_args:
   enable_stochastic_rounding: true
   enable_cautious: true
   enable_8bit: true
-  enable_gc: true
 lr_scheduler: rex
 learning_rate: 1e-6
 cosine_min_lr_ratio: 0.05
@@ -113,6 +113,9 @@ liger_glu_activation: true
 liger_cross_entropy: false
 liger_fused_linear_cross_entropy: false
 # Debug config
 debug: true
 seed: 42

 wandb_project: LLaMa-3.2-1B
 wandb_entity:
 wandb_watch:
+wandb_name: LLaMa-3.2-Text-Cleaner-v0.1-1B-FFT-run4
 wandb_log_model:
 # Model checkpointing config
+output_dir: ./Outputs/LLaMa-3.2-Text-Cleaner-v0.1-1B-FFT-run4
 save_steps: 10
 save_safetensors: true
 save_total_limit: 2
 base_model: meta-llama/Llama-3.2-1B
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
+chat_template_jinja: "{{- bos_token }}{% for message in messages %}{% if message['role'] == 'system' %}{{ raise_exception('Model does not support system turns.') }}{% elif message['role'] == 'user' %}{{ '<|unclean_text|>' + message['content'] | trim }}{% elif message['role'] == 'assistant' %}{{ '<|clean_text|>' + message['content'] | trim + eos_token }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|clean_text|>' }}{% endif %}"
 # Mixed precision training config
 bf16: true
 gradient_accumulation_steps: 1
 micro_batch_size: 8
 eval_batch_size: 8
+warmup_steps: 0
 optimizer: came_pytorch
 optim_args:
   enable_stochastic_rounding: true
   enable_cautious: true
   enable_8bit: true
 lr_scheduler: rex
 learning_rate: 1e-6
 cosine_min_lr_ratio: 0.05
 liger_cross_entropy: false
 liger_fused_linear_cross_entropy: false
+# Garbage Collection
+gc_steps: 1
 # Debug config
 debug: true
 seed: 42