Remove dequantize_stretched_affine

lisjin · lisjin · commit 6bdd3f61f961 · 2025-07-22T12:54:19.000-07:00
diff --git a/torchao/prototype/parq/quant/quant_api.py b/torchao/prototype/parq/quant/quant_api.py
@@ -56,7 +56,7 @@ def choose_qparams_stretched_affine(
 
     scale = max_val / quant_max
     scale = scale.to(dtype=scale_dtype, device=input_float.device)
-    zero_point = torch.full_like(scale, 0.5, dtype=zero_point_dtype)
+    zero_point = torch.full_like(scale, -0.5, dtype=zero_point_dtype)
     return scale, zero_point
 
 
@@ -95,34 +95,12 @@ def quantize_stretched_affine(
     max_val = scale.mul(quant_max)
     input_float = input_float.clamp(min=-max_val, max=max_val)
     with torch.no_grad():
-        quant = torch.round(input_float / scale - zero_point)
+        # difference from quantize_affine: add zero_point before rounding
+        quant = torch.round(input_float / scale + zero_point)
     quant = quant.to(dtype=target_dtype).view(original_shape)
     return quant
 
 
-def dequantize_stretched_affine(
-    data: torch.Tensor,
-    block_size: Tuple[int, ...],
-    scale: torch.Tensor,
-    zero_point: torch.Tensor,
-    data_dtype: torch.dtype,
-    quant_min: Optional[int] = None,
-    quant_max: Optional[int] = None,
-    output_dtype: torch.dtype = torch.float32,
-) -> torch.Tensor:
-    # allow float data_dtype instead of restricting to _SUB_BYTE_UINT_BOUNDS
-    return dequantize_affine(
-        data,
-        block_size,
-        scale,
-        -zero_point,
-        data_dtype,
-        quant_min=quant_min,
-        quant_max=quant_max,
-        output_dtype=output_dtype,
-    )
-
-
 class StretchedAffineQuantizedTensor(AffineQuantizedTensor):
     @classmethod
     def from_hp_to_intx(
@@ -184,7 +162,7 @@ def dequantize(self, output_dtype: Optional[torch.dtype] = None) -> torch.Tensor
             )
 
         data, scale, zero_point = self.tensor_impl.get_plain()
-        dq = dequantize_stretched_affine(
+        dq = dequantize_affine(
             data,
             self.block_size,
             scale,
diff --git a/torchao/prototype/parq/quant/uniform_torchao.py b/torchao/prototype/parq/quant/uniform_torchao.py
@@ -29,7 +29,6 @@
 
 from .quant_api import (
     choose_qparams_stretched_affine,
-    dequantize_stretched_affine,
     quantize_stretched_affine,
 )
 from .quantizer import Quantizer
@@ -157,7 +156,6 @@ def __init__(self, b: int, int_shift: float = 0.5) -> None:
 
         self._choose_qparams = partial(choose_qparams_stretched_affine, b=b)
         self._quantize = quantize_stretched_affine
-        self._dequantize = dequantize_stretched_affine
 
     def get_quant_size(self, b: int) -> int:
         return math.floor(2**b - 2 * self.int_shift) + 1