[Quantization][Deprecation] Remove BitBlas (#32683)

Signed-off-by: Robert Shaw <robshaw@redhat.com>
Signed-off-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com>
Co-authored-by: Robert Shaw <robshaw@redhat.com>
This commit is contained in:
Robert Shaw
2026-01-28 03:06:22 -08:00
committed by GitHub
parent ecb4f82209
commit 247d1a32ea
15 changed files with 2 additions and 2030 deletions

View File

@@ -41,8 +41,6 @@ WEIGHT_LOADER_V2_SUPPORTED = [
"UnquantizedLinearMethod",
"CompressedTensorsLinearMethod",
"CompressedTensorsLinearTransformMethod",
"BitBLASLinearMethod",
"GPTQBitBLASLinearMethod",
"AWQMarlinLinearMethod",
"AWQLinearMethod",
"GPTQMarlinLinearMethod",
@@ -63,14 +61,6 @@ WEIGHT_LOADER_V2_SUPPORTED = [
]
def adjust_bitblas_shard(param, shard_size, shard_offset):
bitblas_tile_size = getattr(param, "bitblas_tile_size", None)
if bitblas_tile_size is not None:
return (shard_size // bitblas_tile_size, shard_offset // bitblas_tile_size)
return shard_size, shard_offset
def adjust_marlin_shard(param, shard_size, shard_offset):
marlin_tile_size = getattr(param, "marlin_tile_size", None)
if marlin_tile_size is None:
@@ -748,10 +738,6 @@ class MergedColumnParallelLinear(ColumnParallelLinear):
param, shard_size, shard_offset
)
shard_size, shard_offset = adjust_bitblas_shard(
param, shard_size, shard_offset
)
if use_bitsandbytes_4bit:
index = list(itertools.accumulate([0] + self.output_sizes))
orig_offsets = {
@@ -794,9 +780,6 @@ class MergedColumnParallelLinear(ColumnParallelLinear):
shard_size, shard_offset = adjust_marlin_shard(
param, shard_size, shard_offset
)
shard_size, shard_offset = adjust_bitblas_shard(
param, shard_size, shard_offset
)
use_bitsandbytes_4bit = getattr(param, "use_bitsandbytes_4bit", False)
is_sharded_weight = getattr(param, "is_sharded_weight", False)