[Quantization][Deprecation] Remove BitBlas (#32683)
Signed-off-by: Robert Shaw <robshaw@redhat.com> Signed-off-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com> Co-authored-by: Robert Shaw <robshaw@redhat.com>
This commit is contained in:
@@ -322,13 +322,11 @@ class PackedColumnParameter(_ColumnvLLMParameter):
|
||||
packed_factor: int | Fraction,
|
||||
packed_dim: int,
|
||||
marlin_tile_size: int | None = None,
|
||||
bitblas_tile_size: int | None = None,
|
||||
**kwargs,
|
||||
):
|
||||
self._packed_factor = packed_factor
|
||||
self._packed_dim = packed_dim
|
||||
self._marlin_tile_size = marlin_tile_size
|
||||
self._bitblas_tile_size = bitblas_tile_size
|
||||
super().__init__(**kwargs)
|
||||
|
||||
@property
|
||||
@@ -343,17 +341,12 @@ class PackedColumnParameter(_ColumnvLLMParameter):
|
||||
def marlin_tile_size(self):
|
||||
return self._marlin_tile_size
|
||||
|
||||
@property
|
||||
def bitblas_tile_size(self):
|
||||
return self._bitblas_tile_size
|
||||
|
||||
def adjust_shard_indexes_for_packing(self, shard_size, shard_offset):
|
||||
return _adjust_shard_indexes_for_packing(
|
||||
shard_size=shard_size,
|
||||
shard_offset=shard_offset,
|
||||
packed_factor=self.packed_factor,
|
||||
marlin_tile_size=self.marlin_tile_size,
|
||||
bitblas_tile_size=self.bitblas_tile_size,
|
||||
)
|
||||
|
||||
|
||||
@@ -373,13 +366,11 @@ class PackedvLLMParameter(ModelWeightParameter):
|
||||
packed_factor: int | Fraction,
|
||||
packed_dim: int,
|
||||
marlin_tile_size: int | None = None,
|
||||
bitblas_tile_size: int | None = None,
|
||||
**kwargs,
|
||||
):
|
||||
self._packed_factor = packed_factor
|
||||
self._packed_dim = packed_dim
|
||||
self._marlin_tile_size = marlin_tile_size
|
||||
self._bitblas_tile_size = bitblas_tile_size
|
||||
super().__init__(**kwargs)
|
||||
|
||||
@property
|
||||
@@ -394,17 +385,12 @@ class PackedvLLMParameter(ModelWeightParameter):
|
||||
def marlin_tile_size(self):
|
||||
return self._marlin_tile_size
|
||||
|
||||
@property
|
||||
def bitblas_tile_size(self):
|
||||
return self._bitblas_tile_size
|
||||
|
||||
def adjust_shard_indexes_for_packing(self, shard_size, shard_offset):
|
||||
return _adjust_shard_indexes_for_packing(
|
||||
shard_size=shard_size,
|
||||
shard_offset=shard_offset,
|
||||
packed_factor=self.packed_factor,
|
||||
marlin_tile_size=self.marlin_tile_size,
|
||||
bitblas_tile_size=self.bitblas_tile_size,
|
||||
)
|
||||
|
||||
|
||||
@@ -617,12 +603,8 @@ def _adjust_shard_indexes_for_marlin(shard_size, shard_offset, marlin_tile_size)
|
||||
return shard_size * marlin_tile_size, shard_offset * marlin_tile_size
|
||||
|
||||
|
||||
def _adjust_shard_indexes_for_bitblas(shard_size, shard_offset, bitblas_tile_size):
|
||||
return shard_size // bitblas_tile_size, shard_offset // bitblas_tile_size
|
||||
|
||||
|
||||
def _adjust_shard_indexes_for_packing(
|
||||
shard_size, shard_offset, packed_factor, marlin_tile_size, bitblas_tile_size
|
||||
shard_size, shard_offset, packed_factor, marlin_tile_size
|
||||
):
|
||||
shard_size = shard_size // packed_factor
|
||||
shard_offset = shard_offset // packed_factor
|
||||
@@ -632,11 +614,5 @@ def _adjust_shard_indexes_for_packing(
|
||||
shard_offset=shard_offset,
|
||||
marlin_tile_size=marlin_tile_size,
|
||||
)
|
||||
elif bitblas_tile_size is not None:
|
||||
return _adjust_shard_indexes_for_bitblas(
|
||||
shard_size=shard_size,
|
||||
shard_offset=shard_offset,
|
||||
bitblas_tile_size=bitblas_tile_size,
|
||||
)
|
||||
|
||||
return shard_size, shard_offset
|
||||
|
||||
Reference in New Issue
Block a user