[BugFix] Add an env to disable moe chunking to work around compile incompatibility (#19642)

Signed-off-by: Ye (Charlotte) Qi <yeq@meta.com>
This commit is contained in:
Ye (Charlotte) Qi
2025-06-22 15:17:49 -07:00
committed by GitHub
parent e91386cde1
commit 33d51f599e
2 changed files with 12 additions and 1 deletions

View File

@@ -225,6 +225,10 @@ class FusedMoEPermuteExpertsUnpermute(ABC):
else:
raise ValueError(f"Unsupported FusedMoe activation: {activation}")
def enable_chunking(self):
return envs.VLLM_ENABLE_FUSED_MOE_ACTIVATION_CHUNKING and \
self.supports_chunking()
@abstractmethod
def apply(
self,
@@ -400,7 +404,7 @@ class FusedMoEModularKernel(torch.nn.Module):
else:
_, M, N, K, top_k = _moe_problem_size(a1q, w1, w2, topk_ids)
if self.fused_experts.supports_chunking():
if self.fused_experts.enable_chunking():
CHUNK_SIZE = envs.VLLM_FUSED_MOE_CHUNK_SIZE
num_chunks = cdiv(M, CHUNK_SIZE)
else: