[Quantization][V1] BitsAndBytes support V1 (#15611)
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
@@ -682,8 +682,9 @@ class ModelConfig:
|
||||
|
||||
def _verify_bnb_config(self) -> None:
|
||||
"""
|
||||
The current version of bitsandbytes (0.44.0) with 8-bit models does not
|
||||
The current version of bitsandbytes (0.45.3) with 8-bit models does not
|
||||
yet support CUDA graph.
|
||||
# TODO Remove this when bitsandbytes supports.
|
||||
"""
|
||||
is_bitsandbytes = self.quantization == "bitsandbytes"
|
||||
has_quantization_config = (getattr(self.hf_config,
|
||||
@@ -698,8 +699,9 @@ class ModelConfig:
|
||||
not self.enforce_eager,
|
||||
]):
|
||||
logger.warning(
|
||||
"CUDA graph is not supported on BitAndBytes 8bit yet, "
|
||||
"CUDA graph is not supported on BitsAndBytes 8bit yet, "
|
||||
"fallback to the eager mode.")
|
||||
|
||||
self.enforce_eager = True
|
||||
|
||||
def _verify_with_expert_parallelism(self) -> None:
|
||||
|
||||
Reference in New Issue
Block a user