From d74a306c4b3e7a0b09581af83f82af910a4b758e Mon Sep 17 00:00:00 2001 From: Frederik Gossen Date: Wed, 8 Apr 2026 12:09:58 -0400 Subject: [PATCH] [Core] Use tuple_return in split_module for tuple-conformant subgraphs (#38752) Signed-off-by: Frederik Gossen Co-authored-by: Boyuan Feng --- vllm/compilation/backends.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vllm/compilation/backends.py b/vllm/compilation/backends.py index dee7cdde7..2846193e7 100644 --- a/vllm/compilation/backends.py +++ b/vllm/compilation/backends.py @@ -31,6 +31,7 @@ from vllm.logging_utils import lazy from vllm.platforms import current_platform from vllm.tracing import instrument, instrument_manual from vllm.utils.import_utils import resolve_obj_by_qualname +from vllm.utils.torch_utils import is_torch_equal_or_newer from .compiler_interface import ( CompilerInterface, @@ -575,11 +576,14 @@ def split_graph( # the semantics of the graph will change when we # have mutations in the graph with _use_lazy_graph_module(True): + has_tuple_return = is_torch_equal_or_newer("2.12.0.dev") + tuple_return_kwarg = {"tuple_return": True} if has_tuple_return else {} split_gm = torch.fx.passes.split_module.split_module( graph, None, lambda node: node_to_subgraph_id[node], keep_original_order=True, + **tuple_return_kwarg, ) outputs = []