[Quantization][Deprecation] Remove BitBlas (#32683)
Signed-off-by: Robert Shaw <robshaw@redhat.com> Signed-off-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com> Co-authored-by: Robert Shaw <robshaw@redhat.com>
This commit is contained in:
@@ -41,8 +41,6 @@ WEIGHT_LOADER_V2_SUPPORTED = [
|
||||
"UnquantizedLinearMethod",
|
||||
"CompressedTensorsLinearMethod",
|
||||
"CompressedTensorsLinearTransformMethod",
|
||||
"BitBLASLinearMethod",
|
||||
"GPTQBitBLASLinearMethod",
|
||||
"AWQMarlinLinearMethod",
|
||||
"AWQLinearMethod",
|
||||
"GPTQMarlinLinearMethod",
|
||||
@@ -63,14 +61,6 @@ WEIGHT_LOADER_V2_SUPPORTED = [
|
||||
]
|
||||
|
||||
|
||||
def adjust_bitblas_shard(param, shard_size, shard_offset):
|
||||
bitblas_tile_size = getattr(param, "bitblas_tile_size", None)
|
||||
if bitblas_tile_size is not None:
|
||||
return (shard_size // bitblas_tile_size, shard_offset // bitblas_tile_size)
|
||||
|
||||
return shard_size, shard_offset
|
||||
|
||||
|
||||
def adjust_marlin_shard(param, shard_size, shard_offset):
|
||||
marlin_tile_size = getattr(param, "marlin_tile_size", None)
|
||||
if marlin_tile_size is None:
|
||||
@@ -748,10 +738,6 @@ class MergedColumnParallelLinear(ColumnParallelLinear):
|
||||
param, shard_size, shard_offset
|
||||
)
|
||||
|
||||
shard_size, shard_offset = adjust_bitblas_shard(
|
||||
param, shard_size, shard_offset
|
||||
)
|
||||
|
||||
if use_bitsandbytes_4bit:
|
||||
index = list(itertools.accumulate([0] + self.output_sizes))
|
||||
orig_offsets = {
|
||||
@@ -794,9 +780,6 @@ class MergedColumnParallelLinear(ColumnParallelLinear):
|
||||
shard_size, shard_offset = adjust_marlin_shard(
|
||||
param, shard_size, shard_offset
|
||||
)
|
||||
shard_size, shard_offset = adjust_bitblas_shard(
|
||||
param, shard_size, shard_offset
|
||||
)
|
||||
|
||||
use_bitsandbytes_4bit = getattr(param, "use_bitsandbytes_4bit", False)
|
||||
is_sharded_weight = getattr(param, "is_sharded_weight", False)
|
||||
|
||||
Reference in New Issue
Block a user