From f27952269ddd0c6596d91358114c298282faadab Mon Sep 17 00:00:00 2001 From: Zhiyu Cheng Date: Wed, 25 Mar 2026 18:47:58 -0700 Subject: [PATCH 1/2] refine _extract_layer_prefixes to better handle mtp modules Signed-off-by: Zhiyu Cheng --- examples/llm_ptq/example_utils.py | 5 +++++ examples/llm_ptq/hf_ptq.py | 9 +++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/examples/llm_ptq/example_utils.py b/examples/llm_ptq/example_utils.py index 7496229a78..4ed2c1e106 100755 --- a/examples/llm_ptq/example_utils.py +++ b/examples/llm_ptq/example_utils.py @@ -364,6 +364,11 @@ def _extract_layer_prefixes(keys): mtp_layer_prefixes = set() for key in keys: parts = key.split(".") + # Capture the top-level MTP module prefix (e.g., "mtp" from "mtp.fc.weight") + # so that non-layer MTP weights like mtp.fc, mtp.norm are also excluded + if parts: + mtp_layer_prefixes.add(parts[0]) + # Also capture specific layer prefixes (e.g., "mtp.layers.0") for i, part in enumerate(parts): if part == "layers" and i + 1 < len(parts) and parts[i + 1].isdigit(): prefix = ".".join(parts[: i + 2]) diff --git a/examples/llm_ptq/hf_ptq.py b/examples/llm_ptq/hf_ptq.py index b81dc60c01..6c8a7597fe 100755 --- a/examples/llm_ptq/hf_ptq.py +++ b/examples/llm_ptq/hf_ptq.py @@ -979,8 +979,13 @@ def quantize_main( if mtp_layer_prefixes: quant_cfg = copy.deepcopy(quant_cfg) for prefix in mtp_layer_prefixes: - # Add exclusion pattern for this MTP layer (e.g., "*layers.92*") - pattern = f"*{prefix.split('.')[-2]}.{prefix.split('.')[-1]}*" + parts = prefix.split(".") + if len(parts) >= 2: + # Multi-component prefix (e.g., "mtp.layers.0" -> "*layers.0*") + pattern = f"*{parts[-2]}.{parts[-1]}*" + else: + # Single-component prefix (e.g., "mtp" -> "*mtp*") + pattern = f"*{prefix}*" quant_cfg["quant_cfg"][pattern] = {"enable": False} print(f"Excluding MTP layer from quantization: {pattern}") From 3d9d337c658cbdcd433cd6d3e1510fdc358f40c0 Mon Sep 17 00:00:00 2001 From: Zhiyu Cheng Date: Mon, 6 Apr 2026 11:39:43 -0700 Subject: [PATCH 2/2] update Signed-off-by: Zhiyu Cheng --- examples/llm_ptq/hf_ptq.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/examples/llm_ptq/hf_ptq.py b/examples/llm_ptq/hf_ptq.py index 6c8a7597fe..a1f7d0808a 100755 --- a/examples/llm_ptq/hf_ptq.py +++ b/examples/llm_ptq/hf_ptq.py @@ -979,13 +979,7 @@ def quantize_main( if mtp_layer_prefixes: quant_cfg = copy.deepcopy(quant_cfg) for prefix in mtp_layer_prefixes: - parts = prefix.split(".") - if len(parts) >= 2: - # Multi-component prefix (e.g., "mtp.layers.0" -> "*layers.0*") - pattern = f"*{parts[-2]}.{parts[-1]}*" - else: - # Single-component prefix (e.g., "mtp" -> "*mtp*") - pattern = f"*{prefix}*" + pattern = f"*{prefix}*" quant_cfg["quant_cfg"][pattern] = {"enable": False} print(f"Excluding MTP layer from quantization: {pattern}")