[Feature][Kernel] Support bitsandbytes quantization and QLoRA (#4776)

This commit is contained in:
chenqianfzh
2024-06-01 13:51:10 -07:00
committed by GitHub
parent 37464a0f74
commit b9c0605a8e
11 changed files with 752 additions and 8 deletions

View File

@@ -4,6 +4,8 @@ from vllm.model_executor.layers.quantization.aqlm import AQLMConfig
from vllm.model_executor.layers.quantization.awq import AWQConfig
from vllm.model_executor.layers.quantization.base_config import (
QuantizationConfig)
from vllm.model_executor.layers.quantization.bitsandbytes import (
BitsAndBytesConfig)
from vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors import ( # noqa: E501
CompressedTensorsConfig)
from vllm.model_executor.layers.quantization.deepspeedfp import (
@@ -30,6 +32,7 @@ QUANTIZATION_METHODS: Dict[str, Type[QuantizationConfig]] = {
"gptq": GPTQConfig,
"squeezellm": SqueezeLLMConfig,
"sparseml": CompressedTensorsConfig,
"bitsandbytes": BitsAndBytesConfig,
}