[Quantization][V1] BitsAndBytes support V1 (#15611)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
Jee Jee Li
2025-03-28 10:12:47 +08:00
committed by GitHub
parent bd45912b99
commit 726efc6a32
7 changed files with 52 additions and 24 deletions

View File

@@ -682,8 +682,9 @@ class ModelConfig:
def _verify_bnb_config(self) -> None:
"""
The current version of bitsandbytes (0.44.0) with 8-bit models does not
The current version of bitsandbytes (0.45.3) with 8-bit models does not
yet support CUDA graph.
# TODO Remove this when bitsandbytes supports.
"""
is_bitsandbytes = self.quantization == "bitsandbytes"
has_quantization_config = (getattr(self.hf_config,
@@ -698,8 +699,9 @@ class ModelConfig:
not self.enforce_eager,
]):
logger.warning(
"CUDA graph is not supported on BitAndBytes 8bit yet, "
"CUDA graph is not supported on BitsAndBytes 8bit yet, "
"fallback to the eager mode.")
self.enforce_eager = True
def _verify_with_expert_parallelism(self) -> None: