[Bugfix] Fix _CPU_MOE_ACT AssertionError when vLLM config not set (#32777)
Signed-off-by: Karan Bansal <karanb192@gmail.com>
This commit is contained in:
@@ -6,7 +6,7 @@ import torch
|
||||
|
||||
from tests.kernels.allclose_default import get_default_atol, get_default_rtol
|
||||
from vllm._custom_ops import cpu_fused_moe, cpu_prepack_moe_weight
|
||||
from vllm.model_executor.layers.fused_moe.cpu_fused_moe import _CPU_MOE_ACT
|
||||
from vllm.model_executor.layers.fused_moe.cpu_fused_moe import _CPU_MOE_ACT_FN
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.utils.torch_utils import set_random_seed
|
||||
|
||||
@@ -68,12 +68,7 @@ def ref_fused_moe(
|
||||
tokens_for_this_expert, curr_w13, curr_w13_bias
|
||||
)
|
||||
# Note: to simulate the kernel implementation
|
||||
gate_up = (
|
||||
_CPU_MOE_ACT[activation]
|
||||
.forward_native(gate_up)
|
||||
.to(dtype=input.dtype)
|
||||
.float()
|
||||
)
|
||||
gate_up = _CPU_MOE_ACT_FN[activation](gate_up).to(dtype=input.dtype).float()
|
||||
expert_out = torch.nn.functional.linear(gate_up, curr_w2, curr_w2_bias)
|
||||
|
||||
outputs.append(expert_out)
|
||||
|
||||
Reference in New Issue
Block a user