[BugFix] Add an env to disable moe chunking to work around compile incompatibility (#19642)
Signed-off-by: Ye (Charlotte) Qi <yeq@meta.com>
This commit is contained in:
committed by
GitHub
parent
e91386cde1
commit
33d51f599e
@@ -225,6 +225,10 @@ class FusedMoEPermuteExpertsUnpermute(ABC):
|
||||
else:
|
||||
raise ValueError(f"Unsupported FusedMoe activation: {activation}")
|
||||
|
||||
def enable_chunking(self):
|
||||
return envs.VLLM_ENABLE_FUSED_MOE_ACTIVATION_CHUNKING and \
|
||||
self.supports_chunking()
|
||||
|
||||
@abstractmethod
|
||||
def apply(
|
||||
self,
|
||||
@@ -400,7 +404,7 @@ class FusedMoEModularKernel(torch.nn.Module):
|
||||
else:
|
||||
_, M, N, K, top_k = _moe_problem_size(a1q, w1, w2, topk_ids)
|
||||
|
||||
if self.fused_experts.supports_chunking():
|
||||
if self.fused_experts.enable_chunking():
|
||||
CHUNK_SIZE = envs.VLLM_FUSED_MOE_CHUNK_SIZE
|
||||
num_chunks = cdiv(M, CHUNK_SIZE)
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user