[Refactor] Replace activation: str with MoEActivation enum (#33843)

Signed-off-by: mgoin <mgoin64@gmail.com> Signed-off-by: Michael Goin <mgoin64@gmail.com>
2026-02-11 20:29:32 -05:00
parent 83b47f67b1
commit ff1f83b056
48 changed files with 474 additions and 282 deletions
--- a/tests/kernels/moe/test_flashinfer.py
+++ b/tests/kernels/moe/test_flashinfer.py
@@ -7,6 +7,7 @@ import torch

 import vllm.model_executor.layers.fused_moe.modular_kernel as mk
 from vllm.config import ParallelConfig, VllmConfig, set_current_vllm_config
+from vllm.model_executor.layers.fused_moe.activation import MoEActivation
 from vllm.model_executor.layers.fused_moe.config import (
    FusedMoEConfig,
    FusedMoEParallelConfig,
@@ -93,9 +94,14 @@ class TestData:

    @staticmethod
    def make_moe_tensors_8bit(
-        m: int, k: int, n: int, e: int, is_trtllm: bool, activation: str = "silu"
+        m: int,
+        k: int,
+        n: int,
+        e: int,
+        is_trtllm: bool,
+        activation: MoEActivation = MoEActivation.SILU,
    ) -> "TestData":
-        is_gated = activation != "relu2_no_mul"
+        is_gated = activation.is_gated

        hidden_states = torch.randn((m, k), device="cuda", dtype=torch.bfloat16) / 10
        w13 = torch.randn(
@@ -194,7 +200,7 @@ def test_flashinfer_per_tensor_moe_fp8_no_graph(
            topk_weights=topk_weights,
            topk_ids=topk_ids,
            inplace=False,
-            activation="silu",
+            activation=MoEActivation.SILU,
            global_num_experts=e,
            expert_map=None,
            apply_router_weight_on_input=True,
@@ -219,21 +225,19 @@ def test_flashinfer_per_tensor_moe_fp8_no_graph(
@pytest.mark.parametrize("m,n,k", MNK_FACTORS)
@pytest.mark.parametrize("e", NUM_EXPERTS)
@pytest.mark.parametrize("topk", TOP_KS)
-@pytest.mark.parametrize("activation", ["silu", "relu2_no_mul"])
+@pytest.mark.parametrize("activation", [MoEActivation.SILU, MoEActivation.RELU2_NO_MUL])
 def test_flashinfer_cutlass_moe_fp8_no_graph(
    m: int,
    n: int,
    k: int,
    e: int,
    topk: int,
-    activation: str,
+    activation: MoEActivation,
    monkeypatch,
    workspace_init,
 ):
    set_random_seed(7)
    monkeypatch.setenv("VLLM_FUSED_MOE_CHUNK_SIZE", "8192")
-    assert activation in ["silu", "relu2_no_mul"]
-    is_act_and_mul = activation == "silu_and_mul"
    with set_current_vllm_config(vllm_config):
        td = TestData.make_moe_tensors_8bit(
            m, k, n, e, is_trtllm=False, activation=activation
@@ -292,7 +296,7 @@ def test_flashinfer_cutlass_moe_fp8_no_graph(
            device="cuda",
            moe_parallel_config=FusedMoEParallelConfig.make_no_parallel(),
            in_dtype=torch.bfloat16,
-            is_act_and_mul=is_act_and_mul,
+            is_act_and_mul=activation.is_gated,
            routing_method=RoutingMethodType.TopK,
        )