[MoE Refactor] Oracle Select FP8+NVFP4 Kernels In Priority (#32414)
This commit is contained in:
@@ -26,6 +26,7 @@ from vllm.model_executor.layers.fused_moe.config import (
|
||||
FusedMoEConfig,
|
||||
FusedMoEParallelConfig,
|
||||
FusedMoEQuantConfig,
|
||||
RoutingMethodType,
|
||||
)
|
||||
from vllm.utils.import_utils import has_deep_ep, has_deep_gemm, has_pplx
|
||||
|
||||
@@ -574,10 +575,14 @@ def make_modular_kernel(
|
||||
num_experts=config.E,
|
||||
experts_per_token=config.topk,
|
||||
hidden_dim=config.K,
|
||||
intermediate_size_per_partition=config.N,
|
||||
num_local_experts=config.num_local_experts,
|
||||
moe_parallel_config=moe_parallel_config,
|
||||
in_dtype=config.dtype,
|
||||
max_num_tokens=next_power_of_2(config.M),
|
||||
activation="silu",
|
||||
device=vllm_config.device_config.device,
|
||||
routing_method=RoutingMethodType.DeepSeekV3,
|
||||
)
|
||||
|
||||
# make modular kernel
|
||||
|
||||
Reference in New Issue
Block a user