mwitiderrick commited on
Commit
ff6f13f
·
verified ·
1 Parent(s): a51a256

Rename recipe.yaml to sparse.yaml

Browse files
Files changed (2) hide show
  1. recipe.yaml +0 -51
  2. sparse.yaml +10 -0
recipe.yaml DELETED
@@ -1,51 +0,0 @@
1
- test_stage:
2
- obcq_modifiers:
3
- LogarithmicEqualizationModifier:
4
- mappings: [
5
- [["re:.*q_proj", "re:.*k_proj", "re:.*v_proj"], "re:.*input_layernorm"],
6
- [["re:.*gate_proj", "re:.*up_proj"], "re:.*post_attention_layernorm"],
7
- ]
8
- QuantizationModifier:
9
- ignore:
10
- # These operations don't make sense to quantize
11
- - LlamaRotaryEmbedding
12
- - LlamaRMSNorm
13
- - SiLUActivation
14
- - MatMulOutput_QK
15
- - MatMulOutput_PV
16
- # Skip quantizing the layers with the most sensitive activations
17
- - model.layers.21.mlp.down_proj
18
- - model.layers.7.mlp.down_proj
19
- - model.layers.2.mlp.down_proj
20
- - model.layers.8.self_attn.q_proj
21
- - model.layers.8.self_attn.k_proj
22
- post_oneshot_calibration: true
23
- scheme_overrides:
24
- # Enable channelwise quantization for better accuracy
25
- Linear:
26
- weights:
27
- num_bits: 8
28
- symmetric: true
29
- strategy: channel
30
- MatMulLeftInput_QK:
31
- input_activations:
32
- num_bits: 8
33
- symmetric: true
34
- MatMulLeftInput_PV:
35
- input_activations:
36
- num_bits: 8
37
- symmetric: true
38
- # For the embeddings, only weight-quantization makes sense
39
- Embedding:
40
- input_activations: null
41
- weights:
42
- num_bits: 8
43
- symmetric: false
44
- SparseGPTModifier:
45
- sparsity: 0.5
46
- block_size: 128
47
- sequential_update: true
48
- quantize: true
49
- percdamp: 0.01
50
- mask_structure: "0:0"
51
- targets: ["re:model.layers.\\d*$"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
sparse.yaml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ test_stage:
2
+ obcq_modifiers:
3
+ SparseGPTModifier:
4
+ sparsity: 0.5
5
+ block_size: 128
6
+ sequential_update: true
7
+ quantize: false
8
+ percdamp: 0.01
9
+ mask_structure: "0:0"
10
+ targets: ["re:model.layers.\\d*$"]