From 74d4f4e57625c29f60ba9037f6b8ac2abe27769a Mon Sep 17 00:00:00 2001
From: lkk12014402 <kaokao.lv@intel.com>
Date: Tue, 7 Apr 2026 09:11:43 +0000
Subject: [PATCH 01/13] fix hadamard transform weight dtype, using float64 as
 default.

Signed-off-by: lkk12014402 <kaokao.lv@intel.com>
---
 auto_round/experimental/transform/apply.py    | 20 +++++++++++++------
 .../experimental/transform/hadamards.py       | 16 ++++++++++++++-
 .../experimental/transform/triton/mxfp4.py    |  5 +++++
 3 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/auto_round/experimental/transform/apply.py b/auto_round/experimental/transform/apply.py
index 6980d75e4..93b306683 100644
--- a/auto_round/experimental/transform/apply.py
+++ b/auto_round/experimental/transform/apply.py
@@ -100,7 +100,6 @@ def _apply_to_module(
             location="input",
             inverse=True,
             device="cpu",
-            precision=module.dtype,
         )
 
         if config.hadamard_type != "random_hadamard":
@@ -115,6 +114,7 @@ def input_hook(self, args):
                 input = args[0]
                 # transform(input)
                 orig_shape = input.shape
+                orig_dtype = input.dtype
                 x_flat = input.contiguous().flatten(end_dim=-2)
                 qdq_input, _ = mxfp4_forward_kernel_wrapper(
                     x_flat,
@@ -122,7 +122,7 @@ def input_hook(self, args):
                         hadamard_weight if hadamard_weight is not None else self.hadamard_matrix.T
                     ),  # this matrix from w_transform, needs transpose
                 )
-                return qdq_input.reshape(orig_shape)
+                return qdq_input.reshape(orig_shape).to(orig_dtype)
 
             # for fused transform + quantization kernel
             module.pre_dequantized_input = True
@@ -135,13 +135,23 @@ def input_hook(self, args):
                 input = args[0]
 
                 ori_shape = input.shape
+                orig_dtype = input.dtype
 
                 if hadamard_weight is not None:
                     input = input.view(-1, hadamard_weight.shape[0])
-                    return _multihead_matmul(input, hadamard_weight.to(input.device)).view(ori_shape)
+                    return (
+                        _multihead_matmul(
+                            input.to(hadamard_weight.dtype),
+                            hadamard_weight.to(input.device)
+                        )
+                    ).view(ori_shape).to(orig_dtype)
                 else:
                     input = input.view(-1, self.hadamard_matrix.shape[0])
-                    return _multihead_matmul(input, self.hadamard_matrix.T).view(ori_shape)
+                    return (
+                        _multihead_matmul(
+                            input.to(self.hadamard_matrix.dtype),
+                            self.hadamard_matrix.T)
+                    ).view(ori_shape).to(orig_dtype)
 
             # for fused transform + quantization kernel
             module.pre_dequantized_input = False
@@ -156,7 +166,6 @@ def input_hook(self, args):
             **config.dict(),
             location="weight",
             device=module.weight.device,
-            precision=module.weight.dtype,
         )
 
         # need save random hadamard matrix needed when inference
@@ -180,7 +189,6 @@ def input_hook(self, args):
                 location="input",
                 inverse=True,
                 device=module.weight.device,
-                precision=module.weight.dtype,
             )
 
             patch_wrapperlinear_to_apply_transform(weight_hadamard_transform, input_hadamard_transform)
diff --git a/auto_round/experimental/transform/hadamards.py b/auto_round/experimental/transform/hadamards.py
index 712232a9a..dea423dd6 100644
--- a/auto_round/experimental/transform/hadamards.py
+++ b/auto_round/experimental/transform/hadamards.py
@@ -34,11 +34,25 @@ def __init__(
         self,
         block_size: int = 32,
         device: torch.device = None,
-        precision: torch.dtype = None,
+        precision: torch.dtype = torch.float64,
         location: str = "weight",
         module_type: type[torch.nn.Module] = torch.nn.Linear,
         inverse: bool = False,
     ):
+        """Initialize a Hadamard transform module.
+
+        Args:
+            block_size: Size of each Hadamard block. The input tensor is reshaped
+                to ``(-1, block_size)`` before applying the transform.
+            device: Device on which to create the Hadamard matrix.
+            precision: Data type used for the Hadamard matrix weights, using float64 as default.
+            location: Target location used by ``apply_transform_weight`` when
+                applying the transform.
+            module_type: Module type associated with the transform application,
+                typically ``torch.nn.Linear``.
+            inverse: Whether to build the inverse form of the transform.
+        """
+
         super().__init__()
         self.size = block_size
         self.scale = 1 / math.sqrt(self.size)
diff --git a/auto_round/experimental/transform/triton/mxfp4.py b/auto_round/experimental/transform/triton/mxfp4.py
index c26413248..8028c167b 100644
--- a/auto_round/experimental/transform/triton/mxfp4.py
+++ b/auto_round/experimental/transform/triton/mxfp4.py
@@ -161,6 +161,11 @@ def mxfp4_forward_kernel_wrapper(
     if hadamard_matrix.device != device:
         hadamard_matrix = hadamard_matrix.to(device)
 
+    dtype = hadamard_matrix.dtype
+
+    if x.dtype != dtype:
+        x = x.to(dtype)
+
     # Make sure inputs are contiguous
     x = x.contiguous()
     hadamard_matrix = hadamard_matrix.contiguous()

From aa06e4391d1be3bff4bfac0f232a13d755bda0e5 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 7 Apr 2026 09:07:25 +0000
Subject: [PATCH 02/13] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 auto_round/experimental/transform/apply.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/auto_round/experimental/transform/apply.py b/auto_round/experimental/transform/apply.py
index 93b306683..2d0d4225d 100644
--- a/auto_round/experimental/transform/apply.py
+++ b/auto_round/experimental/transform/apply.py
@@ -140,18 +140,17 @@ def input_hook(self, args):
                 if hadamard_weight is not None:
                     input = input.view(-1, hadamard_weight.shape[0])
                     return (
-                        _multihead_matmul(
-                            input.to(hadamard_weight.dtype),
-                            hadamard_weight.to(input.device)
-                        )
-                    ).view(ori_shape).to(orig_dtype)
+                        (_multihead_matmul(input.to(hadamard_weight.dtype), hadamard_weight.to(input.device)))
+                        .view(ori_shape)
+                        .to(orig_dtype)
+                    )
                 else:
                     input = input.view(-1, self.hadamard_matrix.shape[0])
                     return (
-                        _multihead_matmul(
-                            input.to(self.hadamard_matrix.dtype),
-                            self.hadamard_matrix.T)
-                    ).view(ori_shape).to(orig_dtype)
+                        (_multihead_matmul(input.to(self.hadamard_matrix.dtype), self.hadamard_matrix.T))
+                        .view(ori_shape)
+                        .to(orig_dtype)
+                    )
 
             # for fused transform + quantization kernel
             module.pre_dequantized_input = False

From 928b155183a11a2950ccb957c5e616b38fa7cd74 Mon Sep 17 00:00:00 2001
From: lkk12014402 <kaokao.lv@intel.com>
Date: Tue, 7 Apr 2026 10:52:10 +0000
Subject: [PATCH 03/13] float32 maybe enough for hadamard transform.

Signed-off-by: lkk12014402 <kaokao.lv@intel.com>
---
 auto_round/experimental/transform/hadamards.py      | 2 +-
 auto_round/experimental/transform/utils/hadamard.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/auto_round/experimental/transform/hadamards.py b/auto_round/experimental/transform/hadamards.py
index dea423dd6..79223fae5 100644
--- a/auto_round/experimental/transform/hadamards.py
+++ b/auto_round/experimental/transform/hadamards.py
@@ -34,7 +34,7 @@ def __init__(
         self,
         block_size: int = 32,
         device: torch.device = None,
-        precision: torch.dtype = torch.float64,
+        precision: torch.dtype = torch.float32,
         location: str = "weight",
         module_type: type[torch.nn.Module] = torch.nn.Linear,
         inverse: bool = False,
diff --git a/auto_round/experimental/transform/utils/hadamard.py b/auto_round/experimental/transform/utils/hadamard.py
index 5ec6bccbd..5c7ade385 100644
--- a/auto_round/experimental/transform/utils/hadamard.py
+++ b/auto_round/experimental/transform/utils/hadamard.py
@@ -70,8 +70,8 @@ def random_hadamard_matrix(
     :param gen: Optional generator random values
     :return: randomly generated hadamard matrix
     """
-    Q = torch.randint(low=0, high=2, size=(size,), generator=gen, dtype=dtype)  # cpu
-    Q = Q.to(device=device)
+    Q = torch.randint(low=0, high=2, size=(size,), generator=gen)  # cpu
+    Q = Q.to(device=device, dtype=dtype)
     Q = Q * 2 - 1
     Q = torch.diag(Q)
     return _matmul_hadU(Q)

From c67b95d70466ee78126129ecafd9a0bdbf3b9dbf Mon Sep 17 00:00:00 2001
From: lkk12014402 <kaokao.lv@intel.com>
Date: Wed, 8 Apr 2026 08:56:33 +0000
Subject: [PATCH 04/13] in-place weight when auto-round tuning.

Signed-off-by: lkk12014402 <kaokao.lv@intel.com>
---
 auto_round/experimental/transform/apply.py    | 52 ++++++-------
 .../experimental/transform/patch_modules.py   | 78 +++++--------------
 2 files changed, 43 insertions(+), 87 deletions(-)

diff --git a/auto_round/experimental/transform/apply.py b/auto_round/experimental/transform/apply.py
index 2d0d4225d..2a0b88298 100644
--- a/auto_round/experimental/transform/apply.py
+++ b/auto_round/experimental/transform/apply.py
@@ -140,17 +140,18 @@ def input_hook(self, args):
                 if hadamard_weight is not None:
                     input = input.view(-1, hadamard_weight.shape[0])
                     return (
-                        (_multihead_matmul(input.to(hadamard_weight.dtype), hadamard_weight.to(input.device)))
-                        .view(ori_shape)
-                        .to(orig_dtype)
-                    )
+                        _multihead_matmul(
+                            input.to(hadamard_weight.dtype),
+                            hadamard_weight.to(input.device)
+                        )
+                    ).view(ori_shape).to(orig_dtype)
                 else:
                     input = input.view(-1, self.hadamard_matrix.shape[0])
                     return (
-                        (_multihead_matmul(input.to(self.hadamard_matrix.dtype), self.hadamard_matrix.T))
-                        .view(ori_shape)
-                        .to(orig_dtype)
-                    )
+                        _multihead_matmul(
+                            input.to(self.hadamard_matrix.dtype),
+                            self.hadamard_matrix.T)
+                    ).view(ori_shape).to(orig_dtype)
 
             # for fused transform + quantization kernel
             module.pre_dequantized_input = False
@@ -175,30 +176,23 @@ def input_hook(self, args):
 
             patch_quantlinear(config.hadamard_type)
 
-        if need_calibration:
-            # for training, the weight changes with every forward pass
-            # for autoround tuning: patch wrapper linear qdq_weight func
-            from auto_round.experimental.transform.patch_modules import (
-                patch_wrapperlinear_to_apply_transform,
-                patch_wrapperwalayer_forward_to_apply_transform,
-            )
+        # for autoround tuning: weight not tuning
+        # for rtn: weight transformed before saving
+        from auto_round.experimental.transform.patch_modules import (
+            patch_wrapperlinear_to_apply_transform,
+            patch_wrapperwalayer_forward_to_apply_transform,
+        )
 
-            input_hadamard_transform = build_hadamard_transform(
-                **config.dict(),
-                location="input",
-                inverse=True,
-                device=module.weight.device,
-            )
+        input_hadamard_transform = build_hadamard_transform(
+            **config.dict(),
+            location="input",
+            inverse=True,
+            device=module.weight.device,
+        )
 
-            patch_wrapperlinear_to_apply_transform(weight_hadamard_transform, input_hadamard_transform)
-            patch_wrapperwalayer_forward_to_apply_transform(input_hadamard_transform)
+        patch_wrapperlinear_to_apply_transform(weight_hadamard_transform, input_hadamard_transform)
+        patch_wrapperwalayer_forward_to_apply_transform(input_hadamard_transform)
 
-        else:
-            # transform is no longer needed (unfusing is not supported)
-            # delattr(module, transform_name)
-            # fuse transform into weight
-            with torch.no_grad():
-                getattr(module, "weight").copy_(weight_hadamard_transform(module.weight).to(module.weight.device))
 
     else:
         # TODO: apply transform to output/q/k
diff --git a/auto_round/experimental/transform/patch_modules.py b/auto_round/experimental/transform/patch_modules.py
index 934ebea9d..a7f6d1d9e 100644
--- a/auto_round/experimental/transform/patch_modules.py
+++ b/auto_round/experimental/transform/patch_modules.py
@@ -32,67 +32,29 @@ def _qdq_weight_patched(self, value, min_scale, max_scale):
             # keep original behavior for >=16bit to avoid changing semantics unexpectedly
             return orig_qdq_weight(self, value, min_scale, max_scale)
 
-        min_scale.data.clamp_(0, 1.0)
-        max_scale.data.clamp_(0, 1.0)
-
-        weight = self.orig_layer.weight
-        if weight.device.type == "meta":
-            weight = self.orig_layer.get_weight().to(self.device)
-
-        is_conv1d = type(self.orig_layer) == transformers.pytorch_utils.Conv1D
-        if is_conv1d:
-            weight = weight.t()
-
-        weight = weight.to(self.device)
-
-        weight_t = w_transform(weight)
-
-        quant_kwargs = {}
-        if hasattr(self.orig_layer, "super_bits"):
-            quant_kwargs["super_bits"] = self.orig_layer.super_bits
-            quant_kwargs["super_group_size"] = self.orig_layer.super_group_size
-
-        weight_q, scale, zp = self.weight_quant_func(
-            weight_t,
-            bits=self.orig_layer.bits,
-            group_size=self.orig_layer.group_size,
-            v=value,
-            min_scale=min_scale,
-            max_scale=max_scale,
-            scale_dtype=self.orig_layer.scale_dtype,
-            tensor_min=self.weight_min,
-            tensor_max=self.weight_max,
-            data_type=self.data_type,
-            q_scale_thresh=self.q_scale_thresh,
-            imatrix=self.orig_layer.imatrix.to(self.device) if hasattr(self.orig_layer, "imatrix") else None,
-            global_scale=getattr(self, "weight_global_scale", None),
-            **quant_kwargs,
-        )
-
-        weight_q = weight_q.to(dtype=weight.dtype)
-
-        if is_conv1d:
-            weight_q = weight_q.t()
-
-        return weight_q, scale, zp
-
+        if getattr(self, "applied_weight_hadamard", None) is None:
+            with torch.no_grad():
+                weight = self.orig_layer.weight
+                if weight.device.type == "meta":
+                    weight = self.orig_layer.get_weight().to(self.device)
+
+                is_conv1d = type(self.orig_layer) == transformers.pytorch_utils.Conv1D
+                if is_conv1d:
+                    weight = weight.t().continuous()
+                new_weight = w_transform(weight)
+                if is_conv1d:
+                    new_weight = weight.t().continuous()
+                self.orig_layer.weight.data.copy_(new_weight)
+                self.applied_weight_hadamard = True
+
+        return orig_qdq_weight(self, value, min_scale, max_scale)
+
+    orig_qdq_act = WrapperLinear._qdq_act
     def _qdq_act_patched(self, x, act_max_scale, act_max=None):
 
-        # transform = getattr(self.orig_layer, transform_attr)
         x = inp_transform(x)
-        act_max_scale.data.clamp_(0, 1.0)
-        x, scale, zp = self.act_quant_func(
-            x,
-            bits=self.orig_layer.act_bits,
-            group_size=self.orig_layer.act_group_size,
-            scale_dtype=self.orig_layer.scale_dtype,
-            q_scale_thresh=self.q_scale_thresh,
-            data_type=self.act_data_type,
-            max_scale=act_max_scale,
-            tensor_max=act_max,
-            global_scale=getattr(self, "input_global_scale", None),
-        )
-        return x, scale, zp
+
+        return orig_qdq_act(self, x, act_max_scale, act_max)
 
     WrapperLinear._qdq_weight = _qdq_weight_patched
     WrapperLinear._qdq_act = _qdq_act_patched

From 4700eb234ebaf2560638c18ce3d536c68babfd7f Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 8 Apr 2026 08:51:08 +0000
Subject: [PATCH 05/13] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 auto_round/experimental/transform/apply.py     | 18 ++++++++----------
 .../experimental/transform/patch_modules.py    |  1 +
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/auto_round/experimental/transform/apply.py b/auto_round/experimental/transform/apply.py
index 2a0b88298..d90aa9b2f 100644
--- a/auto_round/experimental/transform/apply.py
+++ b/auto_round/experimental/transform/apply.py
@@ -140,18 +140,17 @@ def input_hook(self, args):
                 if hadamard_weight is not None:
                     input = input.view(-1, hadamard_weight.shape[0])
                     return (
-                        _multihead_matmul(
-                            input.to(hadamard_weight.dtype),
-                            hadamard_weight.to(input.device)
-                        )
-                    ).view(ori_shape).to(orig_dtype)
+                        (_multihead_matmul(input.to(hadamard_weight.dtype), hadamard_weight.to(input.device)))
+                        .view(ori_shape)
+                        .to(orig_dtype)
+                    )
                 else:
                     input = input.view(-1, self.hadamard_matrix.shape[0])
                     return (
-                        _multihead_matmul(
-                            input.to(self.hadamard_matrix.dtype),
-                            self.hadamard_matrix.T)
-                    ).view(ori_shape).to(orig_dtype)
+                        (_multihead_matmul(input.to(self.hadamard_matrix.dtype), self.hadamard_matrix.T))
+                        .view(ori_shape)
+                        .to(orig_dtype)
+                    )
 
             # for fused transform + quantization kernel
             module.pre_dequantized_input = False
@@ -193,7 +192,6 @@ def input_hook(self, args):
         patch_wrapperlinear_to_apply_transform(weight_hadamard_transform, input_hadamard_transform)
         patch_wrapperwalayer_forward_to_apply_transform(input_hadamard_transform)
 
-
     else:
         # TODO: apply transform to output/q/k
         raise NotImplementedError()
diff --git a/auto_round/experimental/transform/patch_modules.py b/auto_round/experimental/transform/patch_modules.py
index a7f6d1d9e..e099a518d 100644
--- a/auto_round/experimental/transform/patch_modules.py
+++ b/auto_round/experimental/transform/patch_modules.py
@@ -50,6 +50,7 @@ def _qdq_weight_patched(self, value, min_scale, max_scale):
         return orig_qdq_weight(self, value, min_scale, max_scale)
 
     orig_qdq_act = WrapperLinear._qdq_act
+
     def _qdq_act_patched(self, x, act_max_scale, act_max=None):
 
         x = inp_transform(x)

From 43ff2c6703a14cc196d36c1ebd35e78ef5a92591 Mon Sep 17 00:00:00 2001
From: lkk12014402 <kaokao.lv@intel.com>
Date: Thu, 9 Apr 2026 11:00:32 +0000
Subject: [PATCH 06/13] support nvfp4.

Signed-off-by: lkk12014402 <kaokao.lv@intel.com>
---
 auto_round/compressors/base.py                | 10 +--
 auto_round/experimental/qmodules/__init__.py  |  2 +-
 auto_round/experimental/qmodules/nvfp4.py     | 18 ++++
 auto_round/experimental/transform/apply.py    | 12 +--
 .../experimental/transform/hadamards.py       | 18 +++-
 auto_round/experimental/utils.py              | 89 ++++++++++++++-----
 auto_round/inference/backend.py               |  4 +
 auto_round/inference/convert_model.py         |  6 +-
 8 files changed, 120 insertions(+), 39 deletions(-)

diff --git a/auto_round/compressors/base.py b/auto_round/compressors/base.py
index a8735407e..ff6886c97 100644
--- a/auto_round/compressors/base.py
+++ b/auto_round/compressors/base.py
@@ -560,15 +560,11 @@ def __init__(
         # apply hadamard transform
         if hadamard_config:
             from auto_round.experimental.transform.apply import apply_hadamard_transform
-            from auto_round.experimental.utils import check_supported_schemes, normalize_hadamard_config
+            from auto_round.experimental.utils import normalize_hadamard_config
 
-            check_supported_schemes(self.scheme)
+            self.hadamard_config = normalize_hadamard_config(hadamard_config, self.scheme)
+            self.model = apply_hadamard_transform(self.model, self.hadamard_config)
 
-            self.model = apply_hadamard_transform(
-                self.model, hadamard_config, need_calibration=True if self.iters > 0 else False
-            )
-
-            self.hadamard_config = normalize_hadamard_config(hadamard_config)
 
     def _gen_auto_scheme(self) -> dict[str, dict]:
         if self.mllm:
diff --git a/auto_round/experimental/qmodules/__init__.py b/auto_round/experimental/qmodules/__init__.py
index 3862e0293..377784055 100644
--- a/auto_round/experimental/qmodules/__init__.py
+++ b/auto_round/experimental/qmodules/__init__.py
@@ -13,5 +13,5 @@
 # limitations under the License.
 
 from auto_round.experimental.qmodules.mx import MXFP4QuantLinear, MXFP8QuantLinear, HadamardMXFP4QuantLinear
-from auto_round.experimental.qmodules.nvfp4 import NVFP4QuantLinear
+from auto_round.experimental.qmodules.nvfp4 import NVFP4QuantLinear, HadamardNVFP4QuantLinear
 from auto_round.experimental.qmodules.fp8_static import WeightFP8ActFP8StaticQuantLinear
diff --git a/auto_round/experimental/qmodules/nvfp4.py b/auto_round/experimental/qmodules/nvfp4.py
index 81aea8b54..c82846f44 100644
--- a/auto_round/experimental/qmodules/nvfp4.py
+++ b/auto_round/experimental/qmodules/nvfp4.py
@@ -204,3 +204,21 @@ def unpack_data(self, packed_data: torch.Tensor) -> torch.Tensor:
         m, half_n = packed_data.shape
         unpacked_data = unpack_fp4_from_uint8(packed_data, m, half_n * 2, dtype=self.dtype)
         return unpacked_data
+
+
+class HadamardNVFP4QuantLinear(NVFP4QuantLinear):
+    """
+    Quantized linear layer using the NVFP4 quantization scheme.
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.enable_transform = True
+        self.register_buffer(
+            "hadamard_matrix",
+            torch.empty(
+                self.group_size,
+                self.group_size,
+                dtype=self.dtype,
+            ),
+        )
diff --git a/auto_round/experimental/transform/apply.py b/auto_round/experimental/transform/apply.py
index d90aa9b2f..4625d4361 100644
--- a/auto_round/experimental/transform/apply.py
+++ b/auto_round/experimental/transform/apply.py
@@ -4,7 +4,7 @@
 import torch
 import tqdm
 
-from auto_round.experimental.qmodules.mx import MXQuantLinearBase
+from auto_round.experimental.qmodules.base import QModuleBase
 from auto_round.experimental.transform.hadamard_config import HadamardConfig
 from auto_round.experimental.transform.hadamards import build_hadamard_transform
 from auto_round.experimental.utils import is_triton_kernel_available, normalize_hadamard_config
@@ -15,10 +15,10 @@
 def apply_hadamard_transform(
     model: torch.nn.Module,
     config: str | dict | HadamardConfig | None,
-    need_calibration: bool = False,
     location: str = "weight",
     use_tqdm=True,
     desc=None,
+    data_type="mx_fp"
 ):
     """
     Apply a transform configuration to a model.
@@ -60,14 +60,14 @@ def apply_hadamard_transform(
     modules_config = [
         (name, module, config)
         for name, module in model.named_modules()
-        if isinstance(module, torch.nn.Linear) or isinstance(module, MXQuantLinearBase)
+        if isinstance(module, torch.nn.Linear) or isinstance(module, QModuleBase)
     ]
 
     desc = f"Applying {config.hadamard_type} transforms" if desc is None else desc
     for name, module, config in tqdm.tqdm(modules_config, desc=desc, disable=(not use_tqdm)):
         if "lm_head" in name:
             continue
-        _apply_to_module(model, module, config, need_calibration, location)
+        _apply_to_module(model, module, config, location, data_type)
 
     # attach config to model for compression/serialization
     setattr(model, "hadamard_config", config)
@@ -79,8 +79,8 @@ def _apply_to_module(
     model: torch.nn.Module,
     module: torch.nn.Module,
     config: HadamardConfig,
-    need_calibration: bool = False,
     location: str = "weight",
+    data_type: str = "mx_fp"
 ):
     """
     Create transforms and apply them to the module
@@ -107,7 +107,7 @@ def _apply_to_module(
         else:
             hadamard_weight = None
 
-        if is_triton_kernel_available():
+        if is_triton_kernel_available(data_type):
             from auto_round.experimental.transform.triton.mxfp4 import mxfp4_forward_kernel_wrapper
 
             def input_hook(self, args):
diff --git a/auto_round/experimental/transform/hadamards.py b/auto_round/experimental/transform/hadamards.py
index 79223fae5..0d29e5cb0 100644
--- a/auto_round/experimental/transform/hadamards.py
+++ b/auto_round/experimental/transform/hadamards.py
@@ -92,10 +92,14 @@ def forward(self, x: torch.Tensor):
 class RandomHadamardTransform(HadamardTransform):
     def __init__(
         self,
-        *args,
+        block_size: int = 32,
+        device: torch.device = None,
+        precision: torch.dtype = None,
+        location: str = "weight",
+        module_type: type[torch.nn.Module] = torch.nn.Linear,
+        inverse: bool = False,
         seed: int | None = None,
         generator: torch.Generator | None = None,
-        **kwargs,
     ):
         if generator is not None:
             self.generator = generator
@@ -103,7 +107,15 @@ def __init__(
             self.generator = torch.Generator()
             if seed is not None:
                 self.generator.manual_seed(seed)
-        super().__init__(*args, **kwargs)
+
+        super().__init__(
+            block_size=block_size,
+            device=device,
+            precision=precision,
+            location=location,
+            module_type=module_type,
+            inverse=inverse,
+        )
 
     def _create_weight(
         self,
diff --git a/auto_round/experimental/utils.py b/auto_round/experimental/utils.py
index 39a7ff135..1e9b8edb7 100644
--- a/auto_round/experimental/utils.py
+++ b/auto_round/experimental/utils.py
@@ -20,7 +20,7 @@
 from auto_round.experimental.transform.hadamards import HADAMARDS
 from auto_round.utils import logger
 
-SUPPORTED_QUANTIZATION_SCHEMES = ["MXFP4"]
+SUPPORTED_QUANTIZATION_SCHEMES = ["MXFP4", "NVFP4"]
 
 
 def per_tensor_fp8_qdq(
@@ -114,10 +114,12 @@ def clean_model_parameters_and_buffers_(model: torch.nn.Module, name_tuple: tupl
         _clean_param_or_buff_if_exists(module, name_tuple)
 
 
-def is_triton_kernel_available() -> bool:
+def is_triton_kernel_available(data_type: str) -> bool:
     """
     Best-effort check for whether Triton kernel path can be used.
     """
+    if is_nv_fp(data_type):
+        return False
     try:
         import triton  # pylint: disable=E0401
     except Exception:
@@ -134,62 +136,107 @@ def is_triton_kernel_available() -> bool:
     return True
 
 
-def normalize_hadamard_config(hadamard_config: str | dict | HadamardConfig | None) -> dict[str, Any]:
+def normalize_hadamard_config(
+    hadamard_config: str | dict | HadamardConfig | None, scheme: str
+) -> dict[str, Any]:
     """
     Normalize and validate `hadamard_config`.
 
     Supported input types:
-        - None          -> {}
-        - dict          -> validated via HadamardConfig
+        - None           -> {}
+        - dict           -> validated via HadamardConfig
         - HadamardConfig -> validated & converted to dict
-        - str           -> shorthand for `transform_type` in TRANSFORMS keys
-
-    On any validation failure, raises ValueError/TypeError.
+        - str            -> shorthand for `hadamard_type` in HADAMARDS keys
+
+    Additional behavior:
+        - If block_size is not set:
+            - MXFP4 -> default block_size to 32
+            - NVFP4 -> default block_size to 16
+            - other schemes -> emit a warning
+        - If block_size is set but does not match the recommended value:
+            - MXFP4 expects 32
+            - NVFP4 expects 16
+            - emit a warning
     """
+
+    check_supported_schemes(scheme)
+
+    def _apply_scheme_block_size(cfg_dict: dict[str, Any]) -> dict[str, Any]:
+        block_size = cfg_dict.get("block_size")
+
+        if block_size is None:
+            if scheme == "MXFP4":
+                cfg_dict["block_size"] = 32
+                logger.warning("block_size is not set for scheme 'MXFP4'; defaulting to 32.")
+            elif scheme == "NVFP4":
+                cfg_dict["block_size"] = 16
+                logger.warning("block_size is not set for scheme 'NVFP4'; defaulting to 16.")
+            else:
+                logger.warning(
+                    f"block_size is not set and cannot be inferred for scheme {scheme!r}; "
+                    "please set block_size explicitly in hadamard_config if needed."
+                )
+        else:
+            if scheme == "MXFP4" and block_size != 32:
+                logger.warning(f"scheme is 'MXFP4' but block_size={block_size}; recommended value is 32.")
+            elif scheme == "NVFP4" and block_size != 16:
+                logger.warning(f"scheme is 'NVFP4' but block_size={block_size}; recommended value is 16.")
+
+        return cfg_dict
+
+
     # 1) None -> {}
     if hadamard_config is None:
         return {}
 
     # 2) Already a HadamardConfig instance
     if isinstance(hadamard_config, HadamardConfig):
-        # Ensure it passes its own validation and convert to dict
-        cfg = HadamardConfig.model_validate(hadamard_config).model_dump()
-        return cfg
+        try:
+            cfg_dict = HadamardConfig.model_validate(hadamard_config).model_dump()
+            cfg_dict = _apply_scheme_block_size(cfg_dict)
+            return HadamardConfig.model_validate(cfg_dict).model_dump()
+        except Exception as e:
+            raise ValueError(f"Invalid HadamardConfig: {e}") from e
 
     # 3) dict -> validate via HadamardConfig
     if isinstance(hadamard_config, dict):
         try:
-            cfg = HadamardConfig.model_validate(hadamard_config).model_dump()
+            cfg_dict = HadamardConfig.model_validate(hadamard_config).model_dump()
+            cfg_dict = _apply_scheme_block_size(cfg_dict)
+            return HadamardConfig.model_validate(cfg_dict).model_dump()
         except Exception as e:
             raise ValueError(f"Invalid hadamard_config dict: {e}") from e
-        return cfg
 
-    # 4) str -> shorthand for transform_type
+    # 4) str -> shorthand for hadamard_type
     if isinstance(hadamard_config, str):
         key = hadamard_config.strip()
         if not key:
             return {}
 
         if key == "default":
-            cfg = HadamardConfig()
-            return cfg.model_dump()
+            cfg_dict = HadamardConfig().model_dump()
+            cfg_dict = _apply_scheme_block_size(cfg_dict)
+            try:
+                return HadamardConfig.model_validate(cfg_dict).model_dump()
+            except Exception as e:
+                raise ValueError(f"Invalid default hadamard_config after scheme adjustment: {e}") from e
 
         if key not in HADAMARDS:
             raise ValueError(
-                f"Invalid hadamard_config string: {key!r}. " f"Expected one of {sorted(HADAMARDS.keys())}."
+                f"Invalid hadamard_config string: {key!r}. Expected one of {sorted(HADAMARDS.keys())}."
             )
 
         cfg_dict = {"hadamard_type": key}
+        cfg_dict = _apply_scheme_block_size(cfg_dict)
 
         try:
-            cfg = HadamardConfig.model_validate(cfg_dict).model_dump()
+            return HadamardConfig.model_validate(cfg_dict).model_dump()
         except Exception as e:
             raise ValueError(f"hadamard_config built from string {key!r} is invalid for HadamardConfig: {e}") from e
 
-        return cfg
-
     raise TypeError(
-        "hadamard_config must be one of: None, dict, HadamardConfig, or str " f"(got {type(hadamard_config).__name__})"
+        "hadamard_config must be one of: None, dict, HadamardConfig, or str "
+        f"(got {type(hadamard_config).__name__})"
     )
 
 
diff --git a/auto_round/inference/backend.py b/auto_round/inference/backend.py
index d98545679..609091ce3 100644
--- a/auto_round/inference/backend.py
+++ b/auto_round/inference/backend.py
@@ -781,6 +781,10 @@ def dynamic_import_inference_linear(backend, config):
                 return ar_qmodules.HadamardMXFP4QuantLinear
         return ar_qmodules.MXFP4QuantLinear
     if "torch_nvfp4" in backend:
+        hadamard_config = getattr(config, "hadamard_config", None)
+        if hadamard_config is not None and hadamard_config:
+            if hadamard_config["hadamard_type"] == "random_hadamard":
+                return ar_qmodules.HadamardNVFP4QuantLinear
         return ar_qmodules.NVFP4QuantLinear
 
     if "auto_round_kernel" in backend or "ark" in backend:
diff --git a/auto_round/inference/convert_model.py b/auto_round/inference/convert_model.py
index a5b9096b3..f544ca58e 100644
--- a/auto_round/inference/convert_model.py
+++ b/auto_round/inference/convert_model.py
@@ -687,7 +687,11 @@ def convert_hf_model(model: nn.Module, target_device: str = "cpu") -> tuple[nn.M
             hadamard_type=hadamard_config["hadamard_type"],
         )  # apply to activation
         model = apply_hadamard_transform(
-            model, act_hadamard_config, location="input", desc="Register pre forward hook for hadamard transform"
+            model,
+            act_hadamard_config,
+            location="input",
+            desc="Register pre forward hook for hadamard transform",
+            data_type=quantization_config.data_type
         )
 
     # Suggest a better backend if available

From 36d314d5b6c8eb25c0ef7c89dd5942da73728006 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 9 Apr 2026 10:55:11 +0000
Subject: [PATCH 07/13] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 auto_round/compressors/base.py             |  1 -
 auto_round/experimental/transform/apply.py |  4 ++--
 auto_round/experimental/utils.py           | 12 +++---------
 auto_round/inference/convert_model.py      |  2 +-
 4 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/auto_round/compressors/base.py b/auto_round/compressors/base.py
index ff6886c97..21a56f51b 100644
--- a/auto_round/compressors/base.py
+++ b/auto_round/compressors/base.py
@@ -565,7 +565,6 @@ def __init__(
             self.hadamard_config = normalize_hadamard_config(hadamard_config, self.scheme)
             self.model = apply_hadamard_transform(self.model, self.hadamard_config)
 
-
     def _gen_auto_scheme(self) -> dict[str, dict]:
         if self.mllm:
             logger.info("AutoScheme is not yet supported for multimodal LLMs.")
diff --git a/auto_round/experimental/transform/apply.py b/auto_round/experimental/transform/apply.py
index 4625d4361..1c8e3deef 100644
--- a/auto_round/experimental/transform/apply.py
+++ b/auto_round/experimental/transform/apply.py
@@ -18,7 +18,7 @@ def apply_hadamard_transform(
     location: str = "weight",
     use_tqdm=True,
     desc=None,
-    data_type="mx_fp"
+    data_type="mx_fp",
 ):
     """
     Apply a transform configuration to a model.
@@ -80,7 +80,7 @@ def _apply_to_module(
     module: torch.nn.Module,
     config: HadamardConfig,
     location: str = "weight",
-    data_type: str = "mx_fp"
+    data_type: str = "mx_fp",
 ):
     """
     Create transforms and apply them to the module
diff --git a/auto_round/experimental/utils.py b/auto_round/experimental/utils.py
index 1e9b8edb7..ccd3da0cf 100644
--- a/auto_round/experimental/utils.py
+++ b/auto_round/experimental/utils.py
@@ -136,9 +136,7 @@ def is_triton_kernel_available(data_type: str) -> bool:
     return True
 
 
-def normalize_hadamard_config(
-    hadamard_config: str | dict | HadamardConfig | None, scheme: str
-) -> dict[str, Any]:
+def normalize_hadamard_config(hadamard_config: str | dict | HadamardConfig | None, scheme: str) -> dict[str, Any]:
     """
     Normalize and validate `hadamard_config`.
 
@@ -184,7 +182,6 @@ def _apply_scheme_block_size(cfg_dict: dict[str, Any]) -> dict[str, Any]:
 
         return cfg_dict
 
-
     # 1) None -> {}
     if hadamard_config is None:
         return {}
@@ -222,9 +219,7 @@ def _apply_scheme_block_size(cfg_dict: dict[str, Any]) -> dict[str, Any]:
                 raise ValueError(f"Invalid default hadamard_config after scheme adjustment: {e}") from e
 
         if key not in HADAMARDS:
-            raise ValueError(
-                f"Invalid hadamard_config string: {key!r}. Expected one of {sorted(HADAMARDS.keys())}."
-            )
+            raise ValueError(f"Invalid hadamard_config string: {key!r}. Expected one of {sorted(HADAMARDS.keys())}.")
 
         cfg_dict = {"hadamard_type": key}
         cfg_dict = _apply_scheme_block_size(cfg_dict)
@@ -235,8 +230,7 @@ def _apply_scheme_block_size(cfg_dict: dict[str, Any]) -> dict[str, Any]:
             raise ValueError(f"hadamard_config built from string {key!r} is invalid for HadamardConfig: {e}") from e
 
     raise TypeError(
-        "hadamard_config must be one of: None, dict, HadamardConfig, or str "
-        f"(got {type(hadamard_config).__name__})"
+        "hadamard_config must be one of: None, dict, HadamardConfig, or str " f"(got {type(hadamard_config).__name__})"
     )
 
 
diff --git a/auto_round/inference/convert_model.py b/auto_round/inference/convert_model.py
index f544ca58e..2e2002af0 100644
--- a/auto_round/inference/convert_model.py
+++ b/auto_round/inference/convert_model.py
@@ -691,7 +691,7 @@ def convert_hf_model(model: nn.Module, target_device: str = "cpu") -> tuple[nn.M
             act_hadamard_config,
             location="input",
             desc="Register pre forward hook for hadamard transform",
-            data_type=quantization_config.data_type
+            data_type=quantization_config.data_type,
         )
 
     # Suggest a better backend if available

From 6d69b0e5ef4e8d0c1c752de523b79cb5f192b3a6 Mon Sep 17 00:00:00 2001
From: lkk12014402 <kaokao.lv@intel.com>
Date: Thu, 9 Apr 2026 11:09:28 +0000
Subject: [PATCH 08/13] fix typo.

Signed-off-by: lkk12014402 <kaokao.lv@intel.com>
---
 auto_round/compressors/base.py             | 2 +-
 auto_round/experimental/transform/apply.py | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/auto_round/compressors/base.py b/auto_round/compressors/base.py
index 21a56f51b..074ffcf1c 100644
--- a/auto_round/compressors/base.py
+++ b/auto_round/compressors/base.py
@@ -563,7 +563,7 @@ def __init__(
             from auto_round.experimental.utils import normalize_hadamard_config
 
             self.hadamard_config = normalize_hadamard_config(hadamard_config, self.scheme)
-            self.model = apply_hadamard_transform(self.model, self.hadamard_config)
+            self.model = apply_hadamard_transform(self.model, self.hadamard_config, scheme=self.scheme)
 
     def _gen_auto_scheme(self) -> dict[str, dict]:
         if self.mllm:
diff --git a/auto_round/experimental/transform/apply.py b/auto_round/experimental/transform/apply.py
index 1c8e3deef..aeef57023 100644
--- a/auto_round/experimental/transform/apply.py
+++ b/auto_round/experimental/transform/apply.py
@@ -19,6 +19,7 @@ def apply_hadamard_transform(
     use_tqdm=True,
     desc=None,
     data_type="mx_fp",
+    scheme="MXFP4"
 ):
     """
     Apply a transform configuration to a model.
@@ -53,7 +54,7 @@ def apply_hadamard_transform(
         ``config.transform_type``.
     """
 
-    config = normalize_hadamard_config(config)
+    config = normalize_hadamard_config(config, scheme)
     if not isinstance(config, HadamardConfig):
         config = HadamardConfig(**config)
 

From 95436ed7ced33f011181b0a0342f6f34acb293aa Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 9 Apr 2026 11:04:00 +0000
Subject: [PATCH 09/13] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 auto_round/experimental/transform/apply.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/auto_round/experimental/transform/apply.py b/auto_round/experimental/transform/apply.py
index aeef57023..d99e0b928 100644
--- a/auto_round/experimental/transform/apply.py
+++ b/auto_round/experimental/transform/apply.py
@@ -19,7 +19,7 @@ def apply_hadamard_transform(
     use_tqdm=True,
     desc=None,
     data_type="mx_fp",
-    scheme="MXFP4"
+    scheme="MXFP4",
 ):
     """
     Apply a transform configuration to a model.

From c558effce566b44420f8d31e693cc8c5b61df612 Mon Sep 17 00:00:00 2001
From: lkk <33276950+lkk12014402@users.noreply.github.com>
Date: Thu, 9 Apr 2026 19:07:05 +0800
Subject: [PATCH 10/13] fix import issue.

---
 auto_round/experimental/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/auto_round/experimental/utils.py b/auto_round/experimental/utils.py
index ccd3da0cf..92445e7dd 100644
--- a/auto_round/experimental/utils.py
+++ b/auto_round/experimental/utils.py
@@ -15,7 +15,7 @@
 from typing import Any
 
 import torch
-
+from auto_round.compressors.utils import is_nv_fp
 from auto_round.experimental.transform.hadamard_config import HadamardConfig
 from auto_round.experimental.transform.hadamards import HADAMARDS
 from auto_round.utils import logger

From 037aad617c7564c75e738fcd9a03a7083dbe336e Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 9 Apr 2026 11:07:38 +0000
Subject: [PATCH 11/13] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 auto_round/experimental/utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/auto_round/experimental/utils.py b/auto_round/experimental/utils.py
index 92445e7dd..7f65a3231 100644
--- a/auto_round/experimental/utils.py
+++ b/auto_round/experimental/utils.py
@@ -15,6 +15,7 @@
 from typing import Any
 
 import torch
+
 from auto_round.compressors.utils import is_nv_fp
 from auto_round.experimental.transform.hadamard_config import HadamardConfig
 from auto_round.experimental.transform.hadamards import HADAMARDS

From 65092ff56e2b5c3aa6b5c402b58f3a2bcc19c92e Mon Sep 17 00:00:00 2001
From: lkk <33276950+lkk12014402@users.noreply.github.com>
Date: Thu, 9 Apr 2026 19:36:15 +0800
Subject: [PATCH 12/13] enhance the function `normalize_hadamard_config`

---
 auto_round/experimental/utils.py | 54 ++++++++++++++++++++------------
 1 file changed, 34 insertions(+), 20 deletions(-)

diff --git a/auto_round/experimental/utils.py b/auto_round/experimental/utils.py
index 7f65a3231..11759af22 100644
--- a/auto_round/experimental/utils.py
+++ b/auto_round/experimental/utils.py
@@ -137,7 +137,9 @@ def is_triton_kernel_available(data_type: str) -> bool:
     return True
 
 
-def normalize_hadamard_config(hadamard_config: str | dict | HadamardConfig | None, scheme: str) -> dict[str, Any]:
+def normalize_hadamard_config(
+    hadamard_config: str | dict | HadamardConfig | None, scheme: str
+) -> dict[str, Any]:
     """
     Normalize and validate `hadamard_config`.
 
@@ -148,7 +150,7 @@ def normalize_hadamard_config(hadamard_config: str | dict | HadamardConfig | Non
         - str            -> shorthand for `hadamard_type` in HADAMARDS keys
 
     Additional behavior:
-        - If block_size is not set:
+        - If block_size is not set by user:
             - MXFP4 -> default block_size to 32
             - NVFP4 -> default block_size to 16
             - other schemes -> emit a warning
@@ -158,16 +160,18 @@ def normalize_hadamard_config(hadamard_config: str | dict | HadamardConfig | Non
             - emit a warning
     """
 
-    check_supported_schemes(scheme)
+    def _normalize_scheme(s: str) -> str:
+        return s.strip().upper()
 
-    def _apply_scheme_block_size(cfg_dict: dict[str, Any]) -> dict[str, Any]:
+    def _apply_scheme_block_size(cfg_dict: dict[str, Any], block_size_explicitly_set: bool) -> dict[str, Any]:
+        normalized_scheme = _normalize_scheme(scheme)
         block_size = cfg_dict.get("block_size")
 
-        if block_size is None:
-            if scheme == "MXFP4":
+        if not block_size_explicitly_set or block_size is None:
+            if normalized_scheme == "MXFP4":
                 cfg_dict["block_size"] = 32
                 logger.warning("block_size is not set for scheme 'MXFP4'; defaulting to 32.")
-            elif scheme == "NVFP4":
+            elif normalized_scheme == "NVFP4":
                 cfg_dict["block_size"] = 16
                 logger.warning("block_size is not set for scheme 'NVFP4'; defaulting to 16.")
             else:
@@ -176,9 +180,9 @@ def _apply_scheme_block_size(cfg_dict: dict[str, Any]) -> dict[str, Any]:
                     "please set block_size explicitly in hadamard_config if needed."
                 )
         else:
-            if scheme == "MXFP4" and block_size != 32:
+            if normalized_scheme == "MXFP4" and block_size != 32:
                 logger.warning(f"scheme is 'MXFP4' but block_size={block_size}; recommended value is 32.")
-            elif scheme == "NVFP4" and block_size != 16:
+            elif normalized_scheme == "NVFP4" and block_size != 16:
                 logger.warning(f"scheme is 'NVFP4' but block_size={block_size}; recommended value is 16.")
 
         return cfg_dict
@@ -187,20 +191,27 @@ def _apply_scheme_block_size(cfg_dict: dict[str, Any]) -> dict[str, Any]:
     if hadamard_config is None:
         return {}
 
-    # 2) Already a HadamardConfig instance
+    # 2) HadamardConfig instance
     if isinstance(hadamard_config, HadamardConfig):
+        raw_cfg_dict = hadamard_config.model_dump(exclude_unset=True)
+        block_size_explicitly_set = "block_size" in raw_cfg_dict
+
+        cfg_dict = dict(raw_cfg_dict)
+        cfg_dict = _apply_scheme_block_size(cfg_dict, block_size_explicitly_set)
+
         try:
-            cfg_dict = HadamardConfig.model_validate(hadamard_config).model_dump()
-            cfg_dict = _apply_scheme_block_size(cfg_dict)
             return HadamardConfig.model_validate(cfg_dict).model_dump()
         except Exception as e:
             raise ValueError(f"Invalid HadamardConfig: {e}") from e
 
-    # 3) dict -> validate via HadamardConfig
+    # 3) dict
     if isinstance(hadamard_config, dict):
+        block_size_explicitly_set = "block_size" in hadamard_config
+
+        cfg_dict = dict(hadamard_config)
+        cfg_dict = _apply_scheme_block_size(cfg_dict, block_size_explicitly_set)
+
         try:
-            cfg_dict = HadamardConfig.model_validate(hadamard_config).model_dump()
-            cfg_dict = _apply_scheme_block_size(cfg_dict)
             return HadamardConfig.model_validate(cfg_dict).model_dump()
         except Exception as e:
             raise ValueError(f"Invalid hadamard_config dict: {e}") from e
@@ -212,18 +223,20 @@ def _apply_scheme_block_size(cfg_dict: dict[str, Any]) -> dict[str, Any]:
             return {}
 
         if key == "default":
-            cfg_dict = HadamardConfig().model_dump()
-            cfg_dict = _apply_scheme_block_size(cfg_dict)
+            cfg_dict = {}
+            cfg_dict = _apply_scheme_block_size(cfg_dict, block_size_explicitly_set=False)
             try:
                 return HadamardConfig.model_validate(cfg_dict).model_dump()
             except Exception as e:
                 raise ValueError(f"Invalid default hadamard_config after scheme adjustment: {e}") from e
 
         if key not in HADAMARDS:
-            raise ValueError(f"Invalid hadamard_config string: {key!r}. Expected one of {sorted(HADAMARDS.keys())}.")
+            raise ValueError(
+                f"Invalid hadamard_config string: {key!r}. Expected one of {sorted(HADAMARDS.keys())}."
+            )
 
         cfg_dict = {"hadamard_type": key}
-        cfg_dict = _apply_scheme_block_size(cfg_dict)
+        cfg_dict = _apply_scheme_block_size(cfg_dict, block_size_explicitly_set=False)
 
         try:
             return HadamardConfig.model_validate(cfg_dict).model_dump()
@@ -231,7 +244,8 @@ def _apply_scheme_block_size(cfg_dict: dict[str, Any]) -> dict[str, Any]:
             raise ValueError(f"hadamard_config built from string {key!r} is invalid for HadamardConfig: {e}") from e
 
     raise TypeError(
-        "hadamard_config must be one of: None, dict, HadamardConfig, or str " f"(got {type(hadamard_config).__name__})"
+        "hadamard_config must be one of: None, dict, HadamardConfig, or str "
+        f"(got {type(hadamard_config).__name__})"
     )
 
 

From 506c595f0468f7e386764e4bb99d3d0244b400bf Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 9 Apr 2026 11:36:41 +0000
Subject: [PATCH 13/13] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 auto_round/experimental/utils.py | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/auto_round/experimental/utils.py b/auto_round/experimental/utils.py
index 11759af22..b32132c7a 100644
--- a/auto_round/experimental/utils.py
+++ b/auto_round/experimental/utils.py
@@ -137,9 +137,7 @@ def is_triton_kernel_available(data_type: str) -> bool:
     return True
 
 
-def normalize_hadamard_config(
-    hadamard_config: str | dict | HadamardConfig | None, scheme: str
-) -> dict[str, Any]:
+def normalize_hadamard_config(hadamard_config: str | dict | HadamardConfig | None, scheme: str) -> dict[str, Any]:
     """
     Normalize and validate `hadamard_config`.
 
@@ -231,9 +229,7 @@ def _apply_scheme_block_size(cfg_dict: dict[str, Any], block_size_explicitly_set
                 raise ValueError(f"Invalid default hadamard_config after scheme adjustment: {e}") from e
 
         if key not in HADAMARDS:
-            raise ValueError(
-                f"Invalid hadamard_config string: {key!r}. Expected one of {sorted(HADAMARDS.keys())}."
-            )
+            raise ValueError(f"Invalid hadamard_config string: {key!r}. Expected one of {sorted(HADAMARDS.keys())}.")
 
         cfg_dict = {"hadamard_type": key}
         cfg_dict = _apply_scheme_block_size(cfg_dict, block_size_explicitly_set=False)
@@ -244,8 +240,7 @@ def _apply_scheme_block_size(cfg_dict: dict[str, Any], block_size_explicitly_set
             raise ValueError(f"hadamard_config built from string {key!r} is invalid for HadamardConfig: {e}") from e
 
     raise TypeError(
-        "hadamard_config must be one of: None, dict, HadamardConfig, or str "
-        f"(got {type(hadamard_config).__name__})"
+        "hadamard_config must be one of: None, dict, HadamardConfig, or str " f"(got {type(hadamard_config).__name__})"
     )