fxmarty
/

llama-tiny-w-fp8-a-fp8-o-fp8

Safetensors

llama

quark

Model card Files Files and versions

xet

Community

fxmarty commited on Oct 7, 2024

Commit

e489f22

verified ·

1 Parent(s): b6d7ba3

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

config.json +46 -6

config.json CHANGED Viewed

@@ -21,7 +21,10 @@
   "pad_token_id": -1,
   "pretraining_tp": 1,
   "quantization_config": {
-    "activation_scheme": "static",
     "export": {
       "kv_cache_group": [],
       "pack_method": "reorder",
@@ -38,11 +41,48 @@
         ]
       ]
     },
-    "ignored_layers": [
-      "lm_head"
-    ],
-    "kv_cache_scheme": null,
-    "quant_method": "fp8"
   },
   "rms_norm_eps": 1e-06,
   "rope_scaling": null,

   "pad_token_id": -1,
   "pretraining_tp": 1,
   "quantization_config": {
+    "algo_config": null,
+    "exclude": [
+      "lm_head"
+    ],
     "export": {
       "kv_cache_group": [],
       "pack_method": "reorder",
         ]
       ]
     },
+    "global_quant_config": {
+      "bias": null,
+      "input_tensors": {
+        "ch_axis": null,
+        "dtype": "fp8_e4m3",
+        "group_size": null,
+        "is_dynamic": false,
+        "observer_cls": "PerTensorMinMaxObserver",
+        "qscheme": "per_tensor",
+        "round_method": null,
+        "scale_type": null,
+        "symmetric": null
+      },
+      "output_tensors": {
+        "ch_axis": null,
+        "dtype": "fp8_e4m3",
+        "group_size": null,
+        "is_dynamic": false,
+        "observer_cls": "PerTensorMinMaxObserver",
+        "qscheme": "per_tensor",
+        "round_method": null,
+        "scale_type": null,
+        "symmetric": null
+      },
+      "target_device": null,
+      "weight": {
+        "ch_axis": null,
+        "dtype": "fp8_e4m3",
+        "group_size": null,
+        "is_dynamic": false,
+        "observer_cls": "PerTensorMinMaxObserver",
+        "qscheme": "per_tensor",
+        "round_method": null,
+        "scale_type": null,
+        "symmetric": null
+      }
+    },
+    "layer_quant_config": {},
+    "layer_type_quant_config": {},
+    "pack_method": "reorder",
+    "quant_method": "quark",
+    "quant_mode": 1
   },
   "rms_norm_eps": 1e-06,
   "rope_scaling": null,