From 6d70a386fb329a0fb4db11ba52ccc575039ff4d6 Mon Sep 17 00:00:00 2001 From: lvliang-intel Date: Mon, 23 Mar 2026 13:35:23 +0800 Subject: [PATCH 1/3] Fix AutoScheme low memory flag propagation from CLI Signed-off-by: lvliang-intel --- auto_round/__main__.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/auto_round/__main__.py b/auto_round/__main__.py index 9b98b857e..411185ac2 100644 --- a/auto_round/__main__.py +++ b/auto_round/__main__.py @@ -140,7 +140,7 @@ def __init__(self, *args, **kwargs): "Useful when working with large models that don't fit in GPU memory.", ) basic.add_argument( - "--low_cpu_mem_usage", action="store_true", help="Deprecated, Lower CPU memory mode. Defaults to False." + "--low_cpu_mem_usage", action="store_true", help="Deprecated, Lower CPU memory mode. Defaults to True." ) basic.add_argument( "--disable_low_cpu_mem_usage", action="store_true", help="disable lower CPU memory mode. Defaults to False." @@ -676,6 +676,10 @@ def tune(args): layer_config = parse_layer_config_arg(args.layer_config) args.layer_config = layer_config + low_cpu_mem_usage = True + if args.disable_low_cpu_mem_usage: + low_cpu_mem_usage = False + if args.avg_bits is not None: if args.options is None: raise ValueError("please set --options for auto scheme") @@ -684,10 +688,9 @@ def tune(args): avg_bits=args.avg_bits, shared_layers=args.shared_layers, ignore_scale_zp_bits=args.ignore_scale_zp_bits, + low_gpu_mem_usage=args.low_gpu_mem_usage, + low_cpu_mem_usage=low_cpu_mem_usage, ) - low_cpu_mem_usage = True - if args.disable_low_cpu_mem_usage: - low_cpu_mem_usage = False autoround: BaseCompressor = AutoRound( model=model_name, From 555d2a67d908184a03f61dc808417130a46c185e Mon Sep 17 00:00:00 2001 From: Liang Lv Date: Mon, 23 Mar 2026 14:20:31 +0800 Subject: [PATCH 2/3] Update auto_round/__main__.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- auto_round/__main__.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/auto_round/__main__.py b/auto_round/__main__.py index 411185ac2..434f9825e 100644 --- a/auto_round/__main__.py +++ b/auto_round/__main__.py @@ -140,10 +140,21 @@ def __init__(self, *args, **kwargs): "Useful when working with large models that don't fit in GPU memory.", ) basic.add_argument( - "--low_cpu_mem_usage", action="store_true", help="Deprecated, Lower CPU memory mode. Defaults to True." + "--low_cpu_mem_usage", + action="store_true", + help=( + "Deprecated: low CPU memory mode is enabled by default. " + "This flag is kept only for backward compatibility and has no effect " + "beyond explicitly re-enabling the default behavior." + ), ) basic.add_argument( - "--disable_low_cpu_mem_usage", action="store_true", help="disable lower CPU memory mode. Defaults to False." + "--disable_low_cpu_mem_usage", + action="store_true", + help=( + "Disable low CPU memory mode. " + "Use this flag to turn off the default low CPU memory behavior." + ), ) basic.add_argument( "--format", From efa033f2adf29870cdcdb2ba18a60f60c5659123 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 23 Mar 2026 06:20:54 +0000 Subject: [PATCH 3/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- auto_round/__main__.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/auto_round/__main__.py b/auto_round/__main__.py index 434f9825e..057641bdf 100644 --- a/auto_round/__main__.py +++ b/auto_round/__main__.py @@ -151,10 +151,7 @@ def __init__(self, *args, **kwargs): basic.add_argument( "--disable_low_cpu_mem_usage", action="store_true", - help=( - "Disable low CPU memory mode. " - "Use this flag to turn off the default low CPU memory behavior." - ), + help=("Disable low CPU memory mode. " "Use this flag to turn off the default low CPU memory behavior."), ) basic.add_argument( "--format",