[Model][Quantization] HQQ support through Marlin kernel expansion (#9766)
Signed-off-by: ElizaWszola <eliza@neuralmagic.com>
This commit is contained in:
@@ -21,6 +21,7 @@ from vllm.model_executor.layers.quantization.gptq_marlin import (
|
||||
GPTQMarlinConfig)
|
||||
from vllm.model_executor.layers.quantization.gptq_marlin_24 import (
|
||||
GPTQMarlin24Config)
|
||||
from vllm.model_executor.layers.quantization.hqq_marlin import HQQMarlinConfig
|
||||
from vllm.model_executor.layers.quantization.ipex_quant import IPEXConfig
|
||||
from vllm.model_executor.layers.quantization.marlin import MarlinConfig
|
||||
from vllm.model_executor.layers.quantization.modelopt import ModelOptFp8Config
|
||||
@@ -48,6 +49,7 @@ QUANTIZATION_METHODS: Dict[str, Type[QuantizationConfig]] = {
|
||||
"compressed-tensors": CompressedTensorsConfig,
|
||||
"bitsandbytes": BitsAndBytesConfig,
|
||||
"qqq": QQQConfig,
|
||||
"hqq": HQQMarlinConfig,
|
||||
"experts_int8": ExpertsInt8Config,
|
||||
"neuron_quant": NeuronQuantConfig,
|
||||
"ipex": IPEXConfig,
|
||||
|
||||
Reference in New Issue
Block a user