From 7be5bf6bb627f43dbae8529bcdec344789f21d50 Mon Sep 17 00:00:00 2001 From: Brian Dellabetta Date: Thu, 24 Jul 2025 20:11:41 +0000 Subject: [PATCH 1/5] AWQ allow for activation quantization Signed-off-by: Brian Dellabetta --- src/llmcompressor/modifiers/awq/base.py | 31 +++++++++++++------------ 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/src/llmcompressor/modifiers/awq/base.py b/src/llmcompressor/modifiers/awq/base.py index 6e533cc1a..3fd3d0eab 100644 --- a/src/llmcompressor/modifiers/awq/base.py +++ b/src/llmcompressor/modifiers/awq/base.py @@ -1,5 +1,6 @@ import inspect from typing import Dict, List, Optional, Tuple, Union +import warnings import torch from compressed_tensors.quantization import ( @@ -183,25 +184,25 @@ def validate_model_after(model: "AWQModifier") -> "AWQModifier": model._group_size = next(iter(group_size_set)) - in_num_bits_set = set( + num_bits_set = set( group.input_activations.num_bits for group in config.config_groups.values() if group.input_activations is not None + ).union( + set( + group.output_activations.num_bits + for group in config.config_groups.values() + if group.output_activations is not None + ) ) - assert len(in_num_bits_set) == 0 or in_num_bits_set == {16}, ( - "AWQ activations must be 16-bit precision, " - f"input activations {in_num_bits_set} not allowed" - ) - - out_num_bits_set = set( - group.output_activations.num_bits - for group in config.config_groups.values() - if group.output_activations is not None - ) - assert len(out_num_bits_set) == 0 or out_num_bits_set == {16}, ( - "AWQ activations must be 16-bit precision, " - f"output activations {out_num_bits_set} not allowed" - ) + if not (len(num_bits_set) == 0 or num_bits_set == {16}): + warnings.warn( + "A strategy including activation quantization was detected. " + "AWQ was originally intended for weight-only quantization. " + "Lower-precision activations are an experimental feautre, and " + "overall performance may be poor. If it is, consider using " + "`W4A16` or `W4A16_ASYM` quantization schemes instead." + ) return model From 3662be17cf5313cd353e9cbdbc0dbda83c65ff35 Mon Sep 17 00:00:00 2001 From: Brian Dellabetta Date: Thu, 24 Jul 2025 20:49:02 +0000 Subject: [PATCH 2/5] remove validation unit test Signed-off-by: Brian Dellabetta --- tests/llmcompressor/modifiers/awq/test_base.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/llmcompressor/modifiers/awq/test_base.py b/tests/llmcompressor/modifiers/awq/test_base.py index a4adfbdac..fe983c882 100644 --- a/tests/llmcompressor/modifiers/awq/test_base.py +++ b/tests/llmcompressor/modifiers/awq/test_base.py @@ -117,9 +117,6 @@ def test_set_resolved_mappings(): @pytest.mark.unit def test_validate(): - with pytest.raises(ValidationError): - AWQModifier(scheme="W8A8") - with pytest.raises(ValidationError): AWQModifier( config_groups={ From a777241cb9e580d513ba1b3f0f6c002d01dfc34c Mon Sep 17 00:00:00 2001 From: Brian Dellabetta Date: Thu, 24 Jul 2025 20:53:52 +0000 Subject: [PATCH 3/5] gemini code review updates Signed-off-by: Brian Dellabetta --- src/llmcompressor/modifiers/awq/base.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/src/llmcompressor/modifiers/awq/base.py b/src/llmcompressor/modifiers/awq/base.py index 3fd3d0eab..6ca97bd6d 100644 --- a/src/llmcompressor/modifiers/awq/base.py +++ b/src/llmcompressor/modifiers/awq/base.py @@ -184,17 +184,12 @@ def validate_model_after(model: "AWQModifier") -> "AWQModifier": model._group_size = next(iter(group_size_set)) - num_bits_set = set( - group.input_activations.num_bits + num_bits_set = { + act.num_bits for group in config.config_groups.values() - if group.input_activations is not None - ).union( - set( - group.output_activations.num_bits - for group in config.config_groups.values() - if group.output_activations is not None - ) - ) + for act in (group.input_activations, group.output_activations) + if act is not None + } if not (len(num_bits_set) == 0 or num_bits_set == {16}): warnings.warn( "A strategy including activation quantization was detected. " From 48dbcb5c859a78229fedd10b582147591b25e704 Mon Sep 17 00:00:00 2001 From: Brian Dellabetta Date: Thu, 24 Jul 2025 20:54:46 +0000 Subject: [PATCH 4/5] gemini codereview updates Signed-off-by: Brian Dellabetta --- src/llmcompressor/modifiers/awq/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llmcompressor/modifiers/awq/base.py b/src/llmcompressor/modifiers/awq/base.py index 6ca97bd6d..7d2dad619 100644 --- a/src/llmcompressor/modifiers/awq/base.py +++ b/src/llmcompressor/modifiers/awq/base.py @@ -194,7 +194,7 @@ def validate_model_after(model: "AWQModifier") -> "AWQModifier": warnings.warn( "A strategy including activation quantization was detected. " "AWQ was originally intended for weight-only quantization. " - "Lower-precision activations are an experimental feautre, and " + "Lower-precision activations are an experimental feature, and " "overall performance may be poor. If it is, consider using " "`W4A16` or `W4A16_ASYM` quantization schemes instead." ) From 3f01afa8a7c2bc8953900578c94bcd4c1a28e7f7 Mon Sep 17 00:00:00 2001 From: Brian Dellabetta Date: Thu, 24 Jul 2025 21:12:51 +0000 Subject: [PATCH 5/5] stylefixes Signed-off-by: Brian Dellabetta --- src/llmcompressor/modifiers/awq/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llmcompressor/modifiers/awq/base.py b/src/llmcompressor/modifiers/awq/base.py index 7d2dad619..6dae860ea 100644 --- a/src/llmcompressor/modifiers/awq/base.py +++ b/src/llmcompressor/modifiers/awq/base.py @@ -1,6 +1,6 @@ import inspect -from typing import Dict, List, Optional, Tuple, Union import warnings +from typing import Dict, List, Optional, Tuple, Union import torch from compressed_tensors.quantization import (