feat: BF16 FlashInfer Fused Cutlass MOE for Hopper and Blackwell Expert Parallel (#25503)

Signed-off-by: Duncan Moss <djm.moss@gmail.com>
This commit is contained in:
Duncan Moss
2025-09-24 15:50:04 -07:00
committed by GitHub
parent fea8006062
commit 6160ba4151
5 changed files with 121 additions and 6 deletions

View File

@@ -598,6 +598,8 @@ class SharedResizableBuffer:
def get(self, shape: tuple[int, ...], device: torch.device,
dtype: torch.dtype):
if shape == () or shape is None:
return None
shape_numel = prod(shape)
if (self.buffer is None or self.buffer.numel() < shape_numel
or self.buffer.device != device or self.buffer.dtype != dtype):