nm-testing
/

TinyLlama-1.1B-Chat-v1.0-open_platypus-pruned50-quant-ds

Model card Files Files and versions

mwitiderrick commited on Jan 23, 2024

Commit

ff6f13f

·

verified ·

1 Parent(s): a51a256

Rename recipe.yaml to sparse.yaml

Files changed (2) hide show

recipe.yaml +0 -51
sparse.yaml +10 -0

recipe.yaml DELETED Viewed

@@ -1,51 +0,0 @@
-test_stage:
-  obcq_modifiers:
-    LogarithmicEqualizationModifier:
-      mappings: [
-        [["re:.*q_proj", "re:.*k_proj", "re:.*v_proj"], "re:.*input_layernorm"],
-        [["re:.*gate_proj", "re:.*up_proj"], "re:.*post_attention_layernorm"],
-      ]
-    QuantizationModifier:
-      ignore:
-        # These operations don't make sense to quantize
-        - LlamaRotaryEmbedding
-        - LlamaRMSNorm
-        - SiLUActivation
-        - MatMulOutput_QK
-        - MatMulOutput_PV
-        # Skip quantizing the layers with the most sensitive activations
-        - model.layers.21.mlp.down_proj
-        - model.layers.7.mlp.down_proj
-        - model.layers.2.mlp.down_proj
-        - model.layers.8.self_attn.q_proj
-        - model.layers.8.self_attn.k_proj
-      post_oneshot_calibration: true
-      scheme_overrides:
-        # Enable channelwise quantization for better accuracy
-        Linear:
-          weights:
-            num_bits: 8
-            symmetric: true
-            strategy: channel
-        MatMulLeftInput_QK:
-          input_activations:
-            num_bits: 8
-            symmetric: true
-        MatMulLeftInput_PV:
-          input_activations:
-            num_bits: 8
-            symmetric: true
-        # For the embeddings, only weight-quantization makes sense
-        Embedding:
-          input_activations: null
-          weights:
-            num_bits: 8
-            symmetric: false
-    SparseGPTModifier:
-      sparsity: 0.5
-      block_size: 128
-      sequential_update: true
-      quantize: true
-      percdamp: 0.01
-      mask_structure: "0:0"
-      targets: ["re:model.layers.\\d*$"]

sparse.yaml ADDED Viewed

	@@ -0,0 +1,10 @@

+test_stage:
+  obcq_modifiers:
+    SparseGPTModifier:
+      sparsity: 0.5
+      block_size: 128
+      sequential_update: true
+      quantize: false
+      percdamp: 0.01
+      mask_structure: "0:0"
+      targets: ["re:model.layers.\\d*$"]