lym00
/

nunchaku_svdquant_deepcompressor_0.1.0_quantization_flux.1_kontext_dev_test

Model card Files Files and versions

lym00 commited on Jul 23

Commit

b1d4119

·

verified ·

1 Parent(s): d6f7ae7

Update README.md

Files changed (1) hide show

README.md +62 -0

README.md CHANGED Viewed

@@ -416,6 +416,68 @@ def collect(config: DiffusionPtqRunConfig, dataset: datasets.Dataset):
 Potential Fix: deepcompressor.quantizer.impl.scale.py
 ```python
     def quantize(
         self,
         *,

 Potential Fix: deepcompressor.quantizer.impl.scale.py
 ```python
+def quantize_scale(
+    s: torch.Tensor,
+    /,
+    *,
+    quant_dtypes: tp.Sequence[QuantDataType],
+    quant_spans: tp.Sequence[float],
+    view_shapes: tp.Sequence[torch.Size],
+) -> QuantScale:
+    """Quantize the scale tensor.
+    Args:
+        s (`torch.Tensor`):
+            The scale tensor.
+        quant_dtypes (`Sequence[QuantDataType]`):
+            The quantization dtypes of the scale tensor.
+        quant_spans (`Sequence[float]`):
+            The quantization spans of the scale tensor.
+        view_shapes (`Sequence[torch.Size]`):
+            The view shapes of the scale tensor.
+    Returns:
+        `QuantScale`:
+            The quantized scale tensor.
+    """
+    # Add validation at the start
+    if s.numel() == 0:
+        raise ValueError("Input tensor is empty")
+    if s.isnan().any() or s.isinf().any():
+        raise ValueError("Input tensor contains NaN or Inf values")
+    if (s == 0).all():
+        raise ValueError("Input tensor contains all zeros")
+    # Add meta tensor check before any operations
+    if s.is_meta:
+        raise RuntimeError("Cannot quantize scale with meta tensor. Ensure model is loaded on actual device.")
+    # Existing validation
+    if s.isnan().any() or s.isinf().any():
+        raise ValueError("Input tensor contains NaN or Inf values")
+    scale = QuantScale()
+    s = s.abs()
+    for view_shape, quant_dtype, quant_span in zip(view_shapes[:-1], quant_dtypes[:-1], quant_spans[:-1], strict=True):
+        s = s.view(view_shape)  # (#g0, rs0, #g1, rs1, #g2, rs2, ...)
+        ss = s.amax(dim=list(range(1, len(view_shape), 2)), keepdim=True)  # i.e., s_dynamic_span
+        ss = simple_quantize(
+            ss / quant_span, has_zero_point=False, quant_dtype=quant_dtype
+        )  # i.e., s_scale = s_dynamic_span / s_quant_span
+        s = s / ss
+        scale.append(ss)
+    view_shape = view_shapes[-1]
+    s = s.view(view_shape)
+    if any(v != 1 for v in view_shape[1::2]):
+        ss = s.amax(dim=list(range(1, len(view_shape), 2)), keepdim=True)
+        ss = simple_quantize(ss / quant_spans[-1], has_zero_point=False, quant_dtype=quant_dtypes[-1])
+    else:
+        assert quant_spans[-1] == 1, "The last quant span must be 1."
+        ss = simple_quantize(s, has_zero_point=False, quant_dtype=quant_dtypes[-1])
+    scale.append(ss)
+    scale.remove_zero()
+    return scale
     def quantize(
         self,
         *,