[compile] Enable sequence parallelism for full cuda graph without specifying compile sizes (#26681)

Signed-off-by: angelayi <yiangela7@gmail.com>
This commit is contained in:
Angela Yi
2025-10-13 18:15:34 -07:00
committed by GitHub
parent 3e051bda82
commit b59dd19b55
5 changed files with 34 additions and 5 deletions

View File

@@ -71,9 +71,11 @@ class PostGradPassManager(CustomGraphPass):
shape = get_pass_context().runtime_shape
for pass_ in self.passes:
if pass_.is_applicable_for_shape(shape):
if pass_.is_applicable(shape):
pass_(graph)
VllmInductorPass.dump_prefix += 1
else:
logger.debug("Skipping %s with shape %s", pass_, shape)
# post-cleanup goes before fix_functionalization
# because it requires a functional graph