Safetensors
llama
xzuyn commited on
Commit
b92f5e6
·
verified ·
1 Parent(s): 406d0c1

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +129 -0
README.md ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ datasets:
3
+ - PJMixers-Dev/Nelathan_synthetic-sugar-quill-cleaner
4
+ base_model:
5
+ - meta-llama/Llama-3.2-1B
6
+ ---
7
+ # PJMixers-Dev/LLaMa-3.2-Text-Cleaner-v0.1-1B
8
+
9
+ Model was trained at 16,384 max length, so potentially 8K input 8K output. Model will likely *heavily* reformat text, but hopefully end up with a cleaner result.
10
+
11
+ Probably not good for cleaning something you need to be 100% accurate to the original, like educational texts, but probably fine for cleaning creative writing datasets.
12
+
13
+ ## Prompt format
14
+ ```
15
+ <|begin_of_text|><|unclean_text|>Put your uncleaned text here.<|unclean_text|>The model will repond with a cleaned version here.<|end_of_text|>
16
+ ```
17
+
18
+ Example using a sample from [PJMixers/RyokoAI_Honeyfeed3600](https://huggingface.co/datasets/PJMixers/RyokoAI_Honeyfeed3600)), which the model has not been trained on:
19
+ ```
20
+ <|begin_of_text|><|unclean_text|>MODEL STILL TRAINING<|unclean_text|>MODEL STILL TRAINING<|end_of_text|>
21
+ ```
22
+
23
+ ## Axolotl Config
24
+
25
+ ```yaml
26
+ # Requirements before running
27
+ # - Get latest commit of axolotl (currently c0a0c75)
28
+ # - Download these to axolotl/src/axolotl/prompt_formatters
29
+ # - https://github.com/xzuyn/axolotl/blob/came-plus-formatters/src/axolotl/prompt_strategies/text-cleaner.py
30
+ # - pip install ftfy
31
+ # - pip install git+https://github.com/xzuyn/CAME.git@sr-grams-cautious-8bit
32
+
33
+ # Weights and Biases logging config
34
+ wandb_project: LLaMa-3.2-1B
35
+ wandb_entity:
36
+ wandb_watch:
37
+ wandb_name: LLaMa-3.2-Text-Cleaner-v0.1-1B-FFT-run2
38
+ wandb_log_model:
39
+
40
+ # Model checkpointing config
41
+ output_dir: ./Outputs/LLaMa-3.2-Text-Cleaner-v0.1-1B-FFT-run2
42
+ save_steps: 10
43
+ save_safetensors: true
44
+ save_total_limit: 2
45
+ save_only_model: true
46
+
47
+ # Model architecture config
48
+ base_model: meta-llama/Llama-3.2-1B
49
+ model_type: AutoModelForCausalLM
50
+ tokenizer_type: AutoTokenizer
51
+
52
+ # Mixed precision training config
53
+ bf16: true
54
+ fp16: false
55
+ tf32: false
56
+
57
+ # Model loading config
58
+ load_in_8bit: false
59
+ load_in_4bit: false
60
+ strict: false
61
+
62
+ # Sequence config
63
+ sequence_len: 16384
64
+ min_sample_len: 256
65
+ sample_packing: false
66
+ eval_sample_packing: false
67
+ pad_to_sequence_len: false
68
+ train_on_inputs: false
69
+ group_by_length: false
70
+
71
+ # Dataset config
72
+ datasets:
73
+ - path: PJMixers-Dev/Nelathan_synthetic-sugar-quill-cleaner
74
+ type: text-cleaner
75
+ val_set_size: 128
76
+ eval_strategy: steps
77
+ eval_steps: 10
78
+ dataset_prepared_path: ./00-Tokenized-Datasets/LLaMa-3.2-Text-Cleaner-v0.1-1B-seed42
79
+ shuffle_merged_datasets: true
80
+ dataset_exact_deduplication: true
81
+
82
+ # Training hyperparameters
83
+ num_epochs: 1
84
+ gradient_accumulation_steps: 1
85
+ micro_batch_size: 8
86
+ eval_batch_size: 8
87
+ warmup_steps: 10
88
+ optimizer: came_pytorch
89
+ optim_args:
90
+ enable_stochastic_rounding: true
91
+ enable_cautious: true
92
+ enable_8bit: true
93
+ enable_gc: true
94
+ lr_scheduler: rex
95
+ learning_rate: 1e-6
96
+ cosine_min_lr_ratio: 0.05
97
+ weight_decay: 0.01
98
+ max_grad_norm: 0.5
99
+ logging_steps: 1
100
+
101
+ # Model optimization
102
+ embeddings_skip_upcast: true
103
+ gradient_checkpointing: offload
104
+ sdp_attention: true
105
+ plugins:
106
+ - axolotl.integrations.liger.LigerPlugin
107
+ - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
108
+ cut_cross_entropy: true
109
+ liger_rope: true
110
+ liger_rms_norm: true
111
+ liger_layer_norm: true
112
+ liger_glu_activation: true
113
+ liger_cross_entropy: false
114
+ liger_fused_linear_cross_entropy: false
115
+
116
+ # Debug config
117
+ debug: true
118
+ seed: 42
119
+
120
+ # Token config
121
+ added_tokens_overrides:
122
+ 128011: "<|unclean_text|>"
123
+ 128012: "<|clean_text|>"
124
+ special_tokens:
125
+ bos_token: "<|begin_of_text|>"
126
+ eos_token: "<|end_of_text|>"
127
+ pad_token: "<|finetune_right_pad_id|>"
128
+ tokens:
129
+ ```