[MoE Refactor] Oracle Select FP8+NVFP4 Kernels In Priority (#32414)

2026-01-21 08:22:33 -05:00
parent e14467be43
commit 42135d6898
82 changed files with 2710 additions and 1563 deletions
--- a/tests/kernels/moe/modular_kernel_tools/common.py
+++ b/tests/kernels/moe/modular_kernel_tools/common.py
@@ -26,6 +26,7 @@ from vllm.model_executor.layers.fused_moe.config import (
    FusedMoEConfig,
    FusedMoEParallelConfig,
    FusedMoEQuantConfig,
+    RoutingMethodType,
 )
 from vllm.utils.import_utils import has_deep_ep, has_deep_gemm, has_pplx

@@ -574,10 +575,14 @@ def make_modular_kernel(
        num_experts=config.E,
        experts_per_token=config.topk,
        hidden_dim=config.K,
+        intermediate_size_per_partition=config.N,
        num_local_experts=config.num_local_experts,
        moe_parallel_config=moe_parallel_config,
        in_dtype=config.dtype,
        max_num_tokens=next_power_of_2(config.M),
+        activation="silu",
+        device=vllm_config.device_config.device,
+        routing_method=RoutingMethodType.DeepSeekV3,
    )

    # make modular kernel