AQLM CUDA support (#3287)

Co-authored-by: mgoin <michael@neuralmagic.com>
This commit is contained in:
James Fleming
2024-04-23 13:59:33 -04:00
committed by GitHub
parent 62b5166bd4
commit 2b7949c1c2
14 changed files with 1592 additions and 11 deletions

View File

@@ -1,5 +1,6 @@
from typing import Type
from vllm.model_executor.layers.quantization.aqlm import AQLMConfig
from vllm.model_executor.layers.quantization.awq import AWQConfig
from vllm.model_executor.layers.quantization.base_config import (
QuantizationConfig)
@@ -9,6 +10,7 @@ from vllm.model_executor.layers.quantization.marlin import MarlinConfig
from vllm.model_executor.layers.quantization.squeezellm import SqueezeLLMConfig
QUANTIZATION_METHODS = {
"aqlm": AQLMConfig,
"awq": AWQConfig,
"fp8": FP8Config,
"gptq": GPTQConfig,