[Quantization][Deprecation] Remove BitBlas (#32683)

Signed-off-by: Robert Shaw <robshaw@redhat.com>
Signed-off-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com>
Co-authored-by: Robert Shaw <robshaw@redhat.com>
This commit is contained in:
Robert Shaw
2026-01-28 03:06:22 -08:00
committed by GitHub
parent ecb4f82209
commit 247d1a32ea
15 changed files with 2 additions and 2030 deletions

View File

@@ -322,13 +322,11 @@ class PackedColumnParameter(_ColumnvLLMParameter):
packed_factor: int | Fraction,
packed_dim: int,
marlin_tile_size: int | None = None,
bitblas_tile_size: int | None = None,
**kwargs,
):
self._packed_factor = packed_factor
self._packed_dim = packed_dim
self._marlin_tile_size = marlin_tile_size
self._bitblas_tile_size = bitblas_tile_size
super().__init__(**kwargs)
@property
@@ -343,17 +341,12 @@ class PackedColumnParameter(_ColumnvLLMParameter):
def marlin_tile_size(self):
return self._marlin_tile_size
@property
def bitblas_tile_size(self):
return self._bitblas_tile_size
def adjust_shard_indexes_for_packing(self, shard_size, shard_offset):
return _adjust_shard_indexes_for_packing(
shard_size=shard_size,
shard_offset=shard_offset,
packed_factor=self.packed_factor,
marlin_tile_size=self.marlin_tile_size,
bitblas_tile_size=self.bitblas_tile_size,
)
@@ -373,13 +366,11 @@ class PackedvLLMParameter(ModelWeightParameter):
packed_factor: int | Fraction,
packed_dim: int,
marlin_tile_size: int | None = None,
bitblas_tile_size: int | None = None,
**kwargs,
):
self._packed_factor = packed_factor
self._packed_dim = packed_dim
self._marlin_tile_size = marlin_tile_size
self._bitblas_tile_size = bitblas_tile_size
super().__init__(**kwargs)
@property
@@ -394,17 +385,12 @@ class PackedvLLMParameter(ModelWeightParameter):
def marlin_tile_size(self):
return self._marlin_tile_size
@property
def bitblas_tile_size(self):
return self._bitblas_tile_size
def adjust_shard_indexes_for_packing(self, shard_size, shard_offset):
return _adjust_shard_indexes_for_packing(
shard_size=shard_size,
shard_offset=shard_offset,
packed_factor=self.packed_factor,
marlin_tile_size=self.marlin_tile_size,
bitblas_tile_size=self.bitblas_tile_size,
)
@@ -617,12 +603,8 @@ def _adjust_shard_indexes_for_marlin(shard_size, shard_offset, marlin_tile_size)
return shard_size * marlin_tile_size, shard_offset * marlin_tile_size
def _adjust_shard_indexes_for_bitblas(shard_size, shard_offset, bitblas_tile_size):
return shard_size // bitblas_tile_size, shard_offset // bitblas_tile_size
def _adjust_shard_indexes_for_packing(
shard_size, shard_offset, packed_factor, marlin_tile_size, bitblas_tile_size
shard_size, shard_offset, packed_factor, marlin_tile_size
):
shard_size = shard_size // packed_factor
shard_offset = shard_offset // packed_factor
@@ -632,11 +614,5 @@ def _adjust_shard_indexes_for_packing(
shard_offset=shard_offset,
marlin_tile_size=marlin_tile_size,
)
elif bitblas_tile_size is not None:
return _adjust_shard_indexes_for_bitblas(
shard_size=shard_size,
shard_offset=shard_offset,
bitblas_tile_size=bitblas_tile_size,
)
return shard_size, shard_offset