[Perf] Support topk softmax fused kernel for broader num_experts (#22211)

Signed-off-by: Shixian Cui <shixian@amazon.com>
Co-authored-by: Shixian Cui <shixian@amazon.com>
This commit is contained in:
shixianc
2025-08-12 21:34:47 -07:00
committed by GitHub
parent 77a6bf07ae
commit 4c558cf62e
2 changed files with 46 additions and 33 deletions

View File

@@ -36,7 +36,7 @@ from vllm.model_executor.models.mixtral import MixtralMoE
from vllm.platforms import current_platform
from vllm.scalar_type import ScalarType, scalar_types
NUM_EXPERTS = [8, 64]
NUM_EXPERTS = [8, 64, 192]
EP_SIZE = [1, 4]
TOP_KS = [2, 6]