[Feature] Add async tensor parallelism for scaled mm (#20155)

Signed-off-by: cascade812 <cascade812@outlook.com>
This commit is contained in:
cascade
2025-07-30 14:23:41 -07:00
committed by GitHub
parent f12d9256b3
commit 287f527f54
3 changed files with 381 additions and 8 deletions

View File

@@ -477,6 +477,6 @@ class SequenceParallelismPass(VllmInductorPass):
self.begin()
self.dump_graph(graph, "before_sequence_parallelism_pass")
count = self.patterns.apply(graph)
logger.debug("Replaced %s patterns", count)
logger.debug("Replaced %s patterns with sequence parallelism", count)
self.dump_graph(graph, "after_sequence_parallelism_pass")
self.end_and_log()