[Kernels] Clean up FusedMoeMethodBase and modular kernel setup. Remove extra arguments from modular kernel methods. (#22035)
Signed-off-by: Bill Nell <bnell@redhat.com> Co-authored-by: Michael Goin <mgoin64@gmail.com>
This commit is contained in:
@@ -7,6 +7,7 @@ import torch
|
||||
from packaging import version
|
||||
|
||||
from vllm.model_executor.layers.fused_moe.layer import (FusedMoE,
|
||||
FusedMoEConfig,
|
||||
FusedMoEMethodBase)
|
||||
from vllm.model_executor.layers.linear import (LinearBase, LinearMethodBase,
|
||||
UnquantizedLinearMethod,
|
||||
@@ -132,7 +133,7 @@ class BitsAndBytesConfig(QuantizationConfig):
|
||||
return UnquantizedLinearMethod()
|
||||
return BitsAndBytesLinearMethod(self)
|
||||
elif isinstance(layer, FusedMoE):
|
||||
return BitsAndBytesMoEMethod(self)
|
||||
return BitsAndBytesMoEMethod(self, layer.moe_config)
|
||||
return None
|
||||
|
||||
|
||||
@@ -411,7 +412,12 @@ class BitsAndBytesMoEMethod(FusedMoEMethodBase):
|
||||
quant_config: The BitsAndBytes quantization config.
|
||||
"""
|
||||
|
||||
def __init__(self, quant_config: BitsAndBytesConfig):
|
||||
def __init__(
|
||||
self,
|
||||
quant_config: BitsAndBytesConfig,
|
||||
moe: FusedMoEConfig,
|
||||
):
|
||||
super().__init__(moe)
|
||||
try:
|
||||
import bitsandbytes
|
||||
if version.parse(
|
||||
@@ -422,7 +428,6 @@ class BitsAndBytesMoEMethod(FusedMoEMethodBase):
|
||||
raise ImportError("Please install bitsandbytes>=0.46.1 via "
|
||||
"`pip install bitsandbytes>=0.46.1` to use "
|
||||
"bitsandbytes quantizer.") from err
|
||||
self.topk_indices_dtype = None
|
||||
self.quant_config = quant_config
|
||||
|
||||
def create_weights(
|
||||
@@ -470,6 +475,7 @@ class BitsAndBytesMoEMethod(FusedMoEMethodBase):
|
||||
logical_replica_count: Optional[torch.Tensor] = None,
|
||||
) -> torch.Tensor:
|
||||
from vllm.model_executor.layers.fused_moe import fused_experts
|
||||
assert self.fused_experts is None
|
||||
|
||||
if enable_eplb:
|
||||
raise NotImplementedError(
|
||||
|
||||
Reference in New Issue
Block a user