Update README.md
Browse files
README.md
CHANGED
@@ -62,7 +62,7 @@ Model Path: https://github.com/nunchaku-tech/deepcompressor/issues/70#issuecomme
|
|
62 |
|
63 |
Save model: `--save-model true` or `--save-model /PATH/TO/CHECKPOINT/DIR`
|
64 |
|
65 |
-
Example: `python -m deepcompressor.app.diffusion.ptq svdq/flux.1-kontext-dev.yaml examples/diffusion/configs/svdquant/nvfp4.yaml --pipeline-path svdq/flux.1-kontext-dev/ --save-model ~/svdq
|
66 |
|
67 |
Model Files Structure
|
68 |
|
@@ -411,20 +411,129 @@ def collect(config: DiffusionPtqRunConfig, dataset: datasets.Dataset):
|
|
411 |
caches.clear()
|
412 |
```
|
413 |
|
414 |
-
|
415 |
|
416 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
417 |
|
418 |
-
https://github.com/nunchaku-tech/
|
419 |
|
420 |
https://github.com/nunchaku-tech/nunchaku/blob/main/examples/flux.1-kontext-dev.py
|
421 |
|
422 |
-
https://github.com/
|
423 |
|
424 |
https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/flux/pipeline_flux_kontext.py
|
425 |
|
426 |
https://github.com/nunchaku-tech/deepcompressor/issues/91
|
427 |
|
|
|
|
|
428 |
---
|
429 |
|
430 |
# Dependencies
|
|
|
62 |
|
63 |
Save model: `--save-model true` or `--save-model /PATH/TO/CHECKPOINT/DIR`
|
64 |
|
65 |
+
Example: `python -m deepcompressor.app.diffusion.ptq svdq/flux.1-kontext-dev.yaml examples/diffusion/configs/svdquant/nvfp4.yaml --pipeline-path svdq/flux.1-kontext-dev/ --save-model ~/svdq/`
|
66 |
|
67 |
Model Files Structure
|
68 |
|
|
|
411 |
caches.clear()
|
412 |
```
|
413 |
|
414 |
+
4) RuntimeError: Tensor.item() cannot be called on meta tensors
|
415 |
|
416 |
+
Potential Fix: deepcompressor.quantizer.impl.scale.py
|
417 |
+
|
418 |
+
```python
|
419 |
+
def quantize(
|
420 |
+
self,
|
421 |
+
*,
|
422 |
+
# scale-based quantization related arguments
|
423 |
+
scale: torch.Tensor | None = None,
|
424 |
+
zero: torch.Tensor | None = None,
|
425 |
+
# range-based quantization related arguments
|
426 |
+
tensor: torch.Tensor | None = None,
|
427 |
+
dynamic_range: DynamicRange | None = None,
|
428 |
+
) -> tuple[QuantScale, torch.Tensor]:
|
429 |
+
"""Get the quantization scale and zero point of the tensor to be quantized.
|
430 |
+
|
431 |
+
Args:
|
432 |
+
scale (`torch.Tensor` or `None`, *optional*, defaults to `None`):
|
433 |
+
The scale tensor.
|
434 |
+
zero (`torch.Tensor` or `None`, *optional*, defaults to `None`):
|
435 |
+
The zero point tensor.
|
436 |
+
tensor (`torch.Tensor` or `None`, *optional*, defaults to `None`):
|
437 |
+
Ten tensor to be quantized. This is only used for range-based quantization.
|
438 |
+
dynamic_range (`DynamicRange` or `None`, *optional*, defaults to `None`):
|
439 |
+
The dynamic range of the tensor to be quantized.
|
440 |
+
|
441 |
+
Returns:
|
442 |
+
`tuple[QuantScale, torch.Tensor]`:
|
443 |
+
The scale and the zero point.
|
444 |
+
"""
|
445 |
+
# region step 1: get the dynamic span for range-based scale or the scale tensor
|
446 |
+
if scale is None:
|
447 |
+
range_based = True
|
448 |
+
assert isinstance(tensor, torch.Tensor), "View tensor must be a tensor."
|
449 |
+
dynamic_range = dynamic_range or DynamicRange()
|
450 |
+
dynamic_range = dynamic_range.measure(
|
451 |
+
tensor.view(self.tensor_view_shape),
|
452 |
+
zero_domain=self.tensor_zero_domain,
|
453 |
+
is_float_point=self.tensor_quant_dtype.is_float_point,
|
454 |
+
)
|
455 |
+
dynamic_range = dynamic_range.intersect(self.tensor_range_bound)
|
456 |
+
dynamic_span = (dynamic_range.max - dynamic_range.min) if self.has_zero_point else dynamic_range.max
|
457 |
+
else:
|
458 |
+
range_based = False
|
459 |
+
scale = scale.view(self.scale_view_shapes[-1])
|
460 |
+
assert isinstance(scale, torch.Tensor), "Scale must be a tensor."
|
461 |
+
# endregion
|
462 |
+
# region step 2: get the scale
|
463 |
+
if self.linear_scale_quant_dtypes:
|
464 |
+
if range_based:
|
465 |
+
linear_scale = dynamic_span / self.linear_tensor_quant_span
|
466 |
+
elif self.exponent_scale_quant_dtypes:
|
467 |
+
linear_scale = scale.mul(self.exponent_tensor_quant_span).div(self.linear_tensor_quant_span)
|
468 |
+
else:
|
469 |
+
linear_scale = scale
|
470 |
+
lin_s = quantize_scale(
|
471 |
+
linear_scale,
|
472 |
+
quant_dtypes=self.linear_scale_quant_dtypes,
|
473 |
+
quant_spans=self.linear_scale_quant_spans,
|
474 |
+
view_shapes=self.linear_scale_view_shapes,
|
475 |
+
)
|
476 |
+
assert lin_s.data is not None, "Linear scale tensor is None."
|
477 |
+
if not lin_s.data.is_meta:
|
478 |
+
assert not lin_s.data.isnan().any(), "Linear scale tensor contains NaN."
|
479 |
+
assert not lin_s.data.isinf().any(), "Linear scale tensor contains Inf."
|
480 |
+
else:
|
481 |
+
lin_s = QuantScale()
|
482 |
+
if self.exponent_scale_quant_dtypes:
|
483 |
+
if range_based:
|
484 |
+
exp_scale = dynamic_span / self.exponent_tensor_quant_span
|
485 |
+
else:
|
486 |
+
exp_scale = scale
|
487 |
+
if lin_s.data is not None:
|
488 |
+
lin_s.data = lin_s.data.expand(self.linear_scale_view_shapes[-1]).reshape(self.scale_view_shapes[-1])
|
489 |
+
exp_scale = exp_scale / lin_s.data
|
490 |
+
exp_s = quantize_scale(
|
491 |
+
exp_scale,
|
492 |
+
quant_dtypes=self.exponent_scale_quant_dtypes,
|
493 |
+
quant_spans=self.exponent_scale_quant_spans,
|
494 |
+
view_shapes=self.exponent_scale_view_shapes,
|
495 |
+
)
|
496 |
+
assert exp_s.data is not None, "Exponential scale tensor is None."
|
497 |
+
assert not exp_s.data.isnan().any(), "Exponential scale tensor contains NaN."
|
498 |
+
assert not exp_s.data.isinf().any(), "Exponential scale tensor contains Inf."
|
499 |
+
s = exp_s if lin_s.data is None else lin_s.extend(exp_s)
|
500 |
+
else:
|
501 |
+
s = lin_s
|
502 |
+
assert s.data is not None, "Scale tensor is None."
|
503 |
+
assert not s.data.isnan().any(), "Scale tensor contains NaN."
|
504 |
+
assert not s.data.isinf().any(), "Scale tensor contains Inf."
|
505 |
+
# endregion
|
506 |
+
# region step 3: get the zero point
|
507 |
+
if self.has_zero_point:
|
508 |
+
if range_based:
|
509 |
+
if self.tensor_zero_domain == ZeroPointDomain.PreScale:
|
510 |
+
zero = self.tensor_quant_range.min - dynamic_range.min / s.data
|
511 |
+
else:
|
512 |
+
zero = self.tensor_quant_range.min * s.data - dynamic_range.min
|
513 |
+
assert isinstance(zero, torch.Tensor), "Zero point must be a tensor."
|
514 |
+
z = simple_quantize(zero, has_zero_point=True, quant_dtype=self.zero_quant_dtype)
|
515 |
+
else:
|
516 |
+
z = torch.tensor(0, dtype=s.data.dtype, device=s.data.device)
|
517 |
+
assert not z.isnan().any(), "Zero point tensor contains NaN."
|
518 |
+
assert not z.isinf().any(), "Zero point tensor contains Inf."
|
519 |
+
# endregion
|
520 |
+
return s, z
|
521 |
+
```
|
522 |
+
|
523 |
+
References
|
524 |
|
525 |
+
https://github.com/nunchaku-tech/nunchaku/commit/b99fb8be615bc98c6915bbe06a1e0092cbc074a5
|
526 |
|
527 |
https://github.com/nunchaku-tech/nunchaku/blob/main/examples/flux.1-kontext-dev.py
|
528 |
|
529 |
+
https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/transformers/transformer_flux.py#L266
|
530 |
|
531 |
https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/flux/pipeline_flux_kontext.py
|
532 |
|
533 |
https://github.com/nunchaku-tech/deepcompressor/issues/91
|
534 |
|
535 |
+
https://deepwiki.com/nunchaku-tech/deepcompressor
|
536 |
+
|
537 |
---
|
538 |
|
539 |
# Dependencies
|