feat: BF16 FlashInfer Fused Cutlass MOE for Hopper and Blackwell Expert Parallel (#25503)

Signed-off-by: Duncan Moss <djm.moss@gmail.com>
2025-09-24 15:50:04 -07:00
parent fea8006062
commit 6160ba4151
5 changed files with 121 additions and 6 deletions
--- a/vllm/model_executor/layers/fused_moe/modular_kernel.py
+++ b/vllm/model_executor/layers/fused_moe/modular_kernel.py
@@ -598,6 +598,8 @@ class SharedResizableBuffer:

    def get(self, shape: tuple[int, ...], device: torch.device,
            dtype: torch.dtype):
+        if shape == () or shape is None:
+            return None
        shape_numel = prod(shape)
        if (self.buffer is None or self.buffer.numel() < shape_numel
                or self.buffer.device != device or self.buffer.dtype != dtype):