From d74a306c4b3e7a0b09581af83f82af910a4b758e Mon Sep 17 00:00:00 2001
From: Frederik Gossen <frgossen@meta.com>
Date: Wed, 8 Apr 2026 12:09:58 -0400
Subject: [PATCH] [Core] Use tuple_return in split_module for tuple-conformant
 subgraphs (#38752)

Signed-off-by: Frederik Gossen <frgossen@meta.com>
Co-authored-by: Boyuan Feng <boyuan@meta.com>
---
 vllm/compilation/backends.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/vllm/compilation/backends.py b/vllm/compilation/backends.py
index dee7cdde7..2846193e7 100644
--- a/vllm/compilation/backends.py
+++ b/vllm/compilation/backends.py
@@ -31,6 +31,7 @@ from vllm.logging_utils import lazy
 from vllm.platforms import current_platform
 from vllm.tracing import instrument, instrument_manual
 from vllm.utils.import_utils import resolve_obj_by_qualname
+from vllm.utils.torch_utils import is_torch_equal_or_newer
 
 from .compiler_interface import (
     CompilerInterface,
@@ -575,11 +576,14 @@ def split_graph(
     # the semantics of the graph will change when we
     # have mutations in the graph
     with _use_lazy_graph_module(True):
+        has_tuple_return = is_torch_equal_or_newer("2.12.0.dev")
+        tuple_return_kwarg = {"tuple_return": True} if has_tuple_return else {}
         split_gm = torch.fx.passes.split_module.split_module(
             graph,
             None,
             lambda node: node_to_subgraph_id[node],
             keep_original_order=True,
+            **tuple_return_kwarg,
         )
 
     outputs = []