Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
60 commits
Select commit Hold shift + click to select a range
7698b93
init
n1ck-guo Mar 13, 2026
75b4141
Merge branch 'main' of https://github.com/intel/auto-round into hengg…
n1ck-guo Mar 13, 2026
ca17097
update
n1ck-guo Mar 16, 2026
a092e37
Merge branch 'main' of https://github.com/intel/auto-round into hengg…
n1ck-guo Mar 16, 2026
cec4ce4
Merge branch 'main' of https://github.com/intel/auto-round into hengg…
n1ck-guo Mar 16, 2026
e265b8f
update
n1ck-guo Mar 17, 2026
868a82d
merge main
n1ck-guo Mar 17, 2026
9dc930c
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 17, 2026
70a2d02
add switch
n1ck-guo Mar 17, 2026
5998d44
code scan
n1ck-guo Mar 17, 2026
9412596
Merge branch 'hengguo/new_ar_arch' of https://github.com/intel/auto-r…
n1ck-guo Mar 17, 2026
394dcdd
fix
n1ck-guo Mar 17, 2026
7024cad
Merge branch 'main' of https://github.com/intel/auto-round into hengg…
n1ck-guo Mar 18, 2026
36daba0
fix
n1ck-guo Mar 18, 2026
6feed99
fix
n1ck-guo Mar 18, 2026
7bd3e62
fix qweight
n1ck-guo Mar 18, 2026
9b14918
fix ut and refactor code
n1ck-guo Mar 19, 2026
2ab9b51
fix ut
n1ck-guo Mar 19, 2026
dd5aec7
fix
n1ck-guo Mar 20, 2026
d65f1eb
Merge branch 'main' of https://github.com/intel/auto-round into hengg…
n1ck-guo Mar 20, 2026
bde95c6
fix merge
n1ck-guo Mar 20, 2026
7b4e479
fix
n1ck-guo Mar 20, 2026
9b4cab7
update
n1ck-guo Mar 23, 2026
b602e00
merge main
n1ck-guo Mar 23, 2026
a1fe717
sync merge change
n1ck-guo Mar 23, 2026
b58d55a
fix
n1ck-guo Mar 23, 2026
6a7ac60
fix ut
n1ck-guo Mar 27, 2026
64d4a57
Merge branch 'main' of https://github.com/intel/auto-round into hengg…
n1ck-guo Mar 30, 2026
b753bab
decoupling quantization and refactor hadamard
n1ck-guo Mar 30, 2026
b32bc68
support multi rotation
n1ck-guo Mar 30, 2026
dbd1ab0
Merge branch 'main' of https://github.com/intel/auto-round into hengg…
n1ck-guo Mar 30, 2026
f4da8be
sync compressors_new: add is_dynamic_afp8, is_block_wfp8, _get_safete…
n1ck-guo Mar 30, 2026
75a472a
merge main
n1ck-guo Mar 31, 2026
01f6871
fix
n1ck-guo Mar 31, 2026
53bef7c
Merge branch 'main' of https://github.com/intel/auto-round into hengg…
n1ck-guo Mar 31, 2026
20ade76
Merge branch 'main' of https://github.com/intel/auto-round into hengg…
n1ck-guo Mar 31, 2026
41e75bd
fix
n1ck-guo Mar 31, 2026
92139d6
fix
n1ck-guo Mar 31, 2026
166b5b6
fix output dir
n1ck-guo Mar 31, 2026
31b2d2b
update by comment
n1ck-guo Apr 1, 2026
4490a17
Merge branch 'main' of https://github.com/intel/auto-round into hengg…
n1ck-guo Apr 1, 2026
fdc92c2
update
n1ck-guo Apr 2, 2026
fb04613
fix
n1ck-guo Apr 2, 2026
4588279
fix by comment
n1ck-guo Apr 2, 2026
a313c26
fix output_dir
n1ck-guo Apr 2, 2026
19f95ed
fix
n1ck-guo Apr 2, 2026
29d2b64
fix
n1ck-guo Apr 3, 2026
bfec842
merge
n1ck-guo Apr 3, 2026
1c9e529
fix
n1ck-guo Apr 3, 2026
7e7fdeb
fix vlm ut
n1ck-guo Apr 3, 2026
4a035fb
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 7, 2026
463bb6c
fix ut
n1ck-guo Apr 7, 2026
f5d6ff4
Merge branch 'main' of https://github.com/intel/auto-round into hengg…
n1ck-guo Apr 7, 2026
755ab4e
sync merge
n1ck-guo Apr 7, 2026
d661e0b
fix by comment
n1ck-guo Apr 7, 2026
7a80deb
merge
n1ck-guo Apr 7, 2026
08770cf
fix
n1ck-guo Apr 8, 2026
709269a
Merge branch 'main' of https://github.com/intel/auto-round into hengg…
n1ck-guo Apr 8, 2026
97b89dd
fix
n1ck-guo Apr 8, 2026
0025256
performance
n1ck-guo Apr 8, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 4 additions & 34 deletions auto_round/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@


class BasicArgumentParser(argparse.ArgumentParser):

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.add_argument(
Expand Down Expand Up @@ -729,41 +730,10 @@ def tune(args):
trust_remote_code=not args.disable_trust_remote_code,
)

model_name = args.model.rstrip("/")

if model_name.split("/")[-1].strip(".") == "" and "gguf" not in args.format:
if autoround.group_size <= 0:
if "fp" in autoround.act_data_type:
suffix = f"afp{autoround.act_bits}"
else:
suffix = f"a{autoround.act_bits}"
else:
suffix = f"g{autoround.group_size}"
export_dir = os.path.join(args.output_dir, f"w{autoround.bits}{suffix}")
elif model_name.split("/")[-1].strip(".") == "" and "gguf" in args.format:
export_dir = args.output_dir
elif model_name.split("./")[-1].strip("./") != "" and "gguf" in args.format:
export_dir = os.path.join(args.output_dir, model_name.split("/")[-1] + "-gguf")
else:
if isinstance(autoround.group_size, tuple):
assert len(autoround.group_size) == 2, f"Only support 2D group_size, but get {autoround.group_size}"
suffix = f"g{autoround.group_size[0]}x{autoround.group_size[1]}"
else:
if autoround.group_size <= 0:
if "fp" in autoround.act_data_type:
suffix = f"afp{autoround.act_bits}"
else:
suffix = f"a{autoround.act_bits}"
else:
suffix = f"g{autoround.group_size}"
prefix = autoround.data_type.lower().replace("_", "") if "int" not in autoround.data_type else ""
export_dir = os.path.join(
args.output_dir,
model_name.split("/")[-1] + (f"-{prefix}" if prefix else "") + f"-w{autoround.bits}{suffix}",
)

# ======================= Quantize and save model =======================
model, folders = autoround.quantize_and_save(export_dir, format=args.format) # pylint: disable=E1101
# Export directory is now derived automatically inside quantize_and_save via
# BaseCompressor._get_export_dir(), so we only need to pass the base output_dir.
model, folders = autoround.quantize_and_save(args.output_dir, format=args.format) # pylint: disable=E1101
tokenizer = autoround.tokenizer # pylint: disable=E1101

model.eval()
Expand Down
13 changes: 13 additions & 0 deletions auto_round/algorithms/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2026 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
18 changes: 18 additions & 0 deletions auto_round/algorithms/alg_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Copyright (c) 2026 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


class AlgConfig:
def __init__(self):
pass
17 changes: 17 additions & 0 deletions auto_round/algorithms/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Copyright (c) 2026 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


class BaseAlgorithm:
pass
21 changes: 21 additions & 0 deletions auto_round/algorithms/quantization/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Copyright (c) 2026 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from auto_round.algorithms.quantization.base import BaseQuantizers
from auto_round.algorithms.quantization.config import QuantizationConfig
from auto_round.algorithms.quantization.sign_round.config import SignRoundConfig
from auto_round.algorithms.quantization.sign_round.quantizer import SignRoundQuantizer
from auto_round.algorithms.quantization.adam_round.adam import AdamRoundQuantizer
from auto_round.algorithms.quantization.rtn.config import RTNConfig
from auto_round.algorithms.quantization.rtn.quantizer import RTNQuantizer, OptimizedRTNQuantizer
13 changes: 13 additions & 0 deletions auto_round/algorithms/quantization/adam_round/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2026 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
66 changes: 66 additions & 0 deletions auto_round/algorithms/quantization/adam_round/adam.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# Copyright (c) 2026 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Union

import torch

from auto_round.algorithms.quantization.sign_round.quantizer import SignRoundQuantizer
from auto_round.schemes import QuantizationScheme
from auto_round.utils import check_is_cpu, htcore, is_hpex_available


class AdamRoundQuantizer(SignRoundQuantizer):

def __init__(self, config):
super().__init__(config)
self.momentum = None # AdamW handles momentum internally

def _get_optimizer(self, optimizer):
if optimizer is None:
optimizer = torch.optim.AdamW
elif isinstance(optimizer, str):
optimizer = getattr(torch.optim, optimizer)
else:
optimizer = optimizer
return optimizer

def _get_scaler(self):
scaler = None
if self.model_context.amp and not check_is_cpu(self.compress_context.device):
from torch.cuda.amp import GradScaler

scaler = GradScaler(init_scale=1024, growth_interval=100000)
return scaler

def _scale_loss_and_backward(self, scaler, loss):
if scaler is not None:
loss = scaler.scale(loss)

loss.backward()
if is_hpex_available():
htcore.mark_step()
return loss

def _step(self, scaler, optimizer, lr_schedule):
if scaler is not None:
scaler.step(optimizer)
optimizer.zero_grad()
lr_schedule.step()
scaler.update()
else:
optimizer.step()
optimizer.zero_grad()
lr_schedule.step()
if is_hpex_available():
htcore.mark_step()
Loading
Loading