[Hardware][CPU] Support AWQ for CPU backend (#7515)
This commit is contained in:
@@ -20,6 +20,7 @@ from vllm.model_executor.layers.quantization.utils.marlin_utils import (
|
||||
from vllm.model_executor.layers.vocab_parallel_embedding import ParallelLMHead
|
||||
from vllm.model_executor.parameter import (GroupQuantScaleParameter,
|
||||
PackedvLLMParameter)
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.scalar_type import scalar_types
|
||||
|
||||
logger = init_logger(__name__)
|
||||
@@ -123,6 +124,9 @@ class AWQMarlinConfig(QuantizationConfig):
|
||||
group_size = quant_config.get("group_size")
|
||||
has_zp = quant_config.get("zero_point")
|
||||
|
||||
if not current_platform.is_cuda():
|
||||
return False
|
||||
|
||||
if quant_method != "awq":
|
||||
return False
|
||||
|
||||
|
||||
Reference in New Issue
Block a user