File size: 1,168 Bytes
51b3b45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79dd1e6
51b3b45
 
 
 
 
 
 
 
 
 
 
 
 
 
5f41b4d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
test_stage:
  obcq_modifiers:
    LogarithmicEqualizationModifier:
      mappings: [
        [["re:.*q_proj", "re:.*k_proj", "re:.*v_proj"], "re:.*input_layernorm"],
        [["re:.*gate_proj", "re:.*up_proj"], "re:.*post_attention_layernorm"],
      ] 
    QuantizationModifier:
      ignore:
        # These operations don't make sense to quantize
        - LlamaRotaryEmbedding
        - LlamaRMSNorm
        - SiLUActivation
        - MatMulOutput_QK
        - MatMulOutput_PV
        # Skip quantizing the layers with the most sensitive activations
        - model.layers.21.mlp.down_proj
        - model.layers.7.mlp.down_proj
        - model.layers.2.mlp.down_proj
        - model.layers.8.self_attn.q_proj
        - model.layers.8.self_attn.k_proj
      post_oneshot_calibration: false
      scheme_overrides:
        Linear:
          weights:
            num_bits: 8
            symmetric: true
            strategy: channel
        MatMulLeftInput_QK:
          input_activations:
            num_bits: 8
            symmetric: true
        Embedding:
          input_activations: null
          weights:
            num_bits: 8
            symmetric: false