From dea63512bb9bdf7521d591546c52138d9d79e8ce Mon Sep 17 00:00:00 2001 From: danisereb Date: Thu, 12 Feb 2026 16:09:55 +0200 Subject: [PATCH] Add config file for fused MoE for Nemotron (TP4, B200) (#34411) Signed-off-by: Daniel Serebrenik --- .../E=512,N=672,device_name=NVIDIA_B200.json | 59 +++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=512,N=672,device_name=NVIDIA_B200.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=512,N=672,device_name=NVIDIA_B200.json b/vllm/model_executor/layers/fused_moe/configs/E=512,N=672,device_name=NVIDIA_B200.json new file mode 100644 index 000000000..ac46a8afb --- /dev/null +++ b/vllm/model_executor/layers/fused_moe/configs/E=512,N=672,device_name=NVIDIA_B200.json @@ -0,0 +1,59 @@ +{ + "triton_version": "3.6.0", + "8": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 32, + "num_warps": 4, + "num_stages": 4 + }, + "32": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 64, + "num_warps": 4, + "num_stages": 3 + }, + "64": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 32, + "num_warps": 4, + "num_stages": 4 + }, + "128": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 5 + }, + "256": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 64, + "num_warps": 4, + "num_stages": 5 + }, + "512": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 5 + }, + "1024": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 2 + } +}