[torch.compile] Speed up MOE handling in forward_context (#33184)

Signed-off-by: Richard Zou <zou3519@gmail.com>
This commit is contained in:
Richard Zou
2026-01-27 18:17:54 -05:00
committed by GitHub
parent 3a6d5cbefd
commit d9aa39a3bb
4 changed files with 22 additions and 18 deletions

View File

@@ -597,6 +597,10 @@ class CompilationConfig:
Map from layer name to layer objects that need to be accessed outside
model code, e.g., Attention, FusedMOE when dp_size>1."""
static_all_moe_layers: list[str] = field(default_factory=list, init=False)
"""The names of all the MOE layers in the model
"""
# Attention ops; used for piecewise cudagraphs
# Use PyTorch operator format: "namespace::name"
_attention_ops: ClassVar[list[str]] = [