From 39f9ea0da4a45e9638937b062f86f03db313a0d8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rapha=C3=ABl=20Rialland?=
 <36076211+TQCB@users.noreply.github.com>
Date: Fri, 6 Mar 2026 15:15:31 +0100
Subject: [PATCH] [Bugfix] Fix `cudagraph_mode:FULL` dispatch (This does not
 impact `FULL_AND_PIECEWISE` (default)) (#36165)

---
 vllm/v1/cudagraph_dispatcher.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/vllm/v1/cudagraph_dispatcher.py b/vllm/v1/cudagraph_dispatcher.py
index be459cd29..b852808ec 100644
--- a/vllm/v1/cudagraph_dispatcher.py
+++ b/vllm/v1/cudagraph_dispatcher.py
@@ -293,16 +293,14 @@ class CudagraphDispatcher:
                 )
                 effective_num_active_loras = self.vllm_config.lora_config.max_loras + 1
 
+        normalized_uniform = uniform_decode and self.cudagraph_mode.separate_routine()
         batch_desc = self._create_padded_batch_descriptor(
-            num_tokens, uniform_decode, has_lora, effective_num_active_loras
+            num_tokens, normalized_uniform, has_lora, effective_num_active_loras
         )
 
         if CUDAGraphMode.FULL in allowed_modes:
             # check if key exists for full cudagraph
-            # For pure FULL mode, keys are registered with uniform=False.
             batch_desc_to_check = batch_desc
-            if self.cudagraph_mode == CUDAGraphMode.FULL:
-                batch_desc_to_check = replace(batch_desc, uniform=False)
             if batch_desc_to_check in self.cudagraph_keys[CUDAGraphMode.FULL]:
                 return CUDAGraphMode.FULL, batch_desc_to_check