[compile] Enable sequence parallelism for full cuda graph without specifying compile sizes (#26681)
Signed-off-by: angelayi <yiangela7@gmail.com>
This commit is contained in:
@@ -71,9 +71,11 @@ class PostGradPassManager(CustomGraphPass):
|
||||
|
||||
shape = get_pass_context().runtime_shape
|
||||
for pass_ in self.passes:
|
||||
if pass_.is_applicable_for_shape(shape):
|
||||
if pass_.is_applicable(shape):
|
||||
pass_(graph)
|
||||
VllmInductorPass.dump_prefix += 1
|
||||
else:
|
||||
logger.debug("Skipping %s with shape %s", pass_, shape)
|
||||
|
||||
# post-cleanup goes before fix_functionalization
|
||||
# because it requires a functional graph
|
||||
|
||||
Reference in New Issue
Block a user