[Bugfix] Fix cudagraph_mode:FULL dispatch (This does not impact FULL_AND_PIECEWISE (default)) (#36165)

This commit is contained in:
Raphaël Rialland
2026-03-06 15:15:31 +01:00
committed by GitHub
parent e4ae148a78
commit 39f9ea0da4

View File

@@ -293,16 +293,14 @@ class CudagraphDispatcher:
)
effective_num_active_loras = self.vllm_config.lora_config.max_loras + 1
normalized_uniform = uniform_decode and self.cudagraph_mode.separate_routine()
batch_desc = self._create_padded_batch_descriptor(
num_tokens, uniform_decode, has_lora, effective_num_active_loras
num_tokens, normalized_uniform, has_lora, effective_num_active_loras
)
if CUDAGraphMode.FULL in allowed_modes:
# check if key exists for full cudagraph
# For pure FULL mode, keys are registered with uniform=False.
batch_desc_to_check = batch_desc
if self.cudagraph_mode == CUDAGraphMode.FULL:
batch_desc_to_check = replace(batch_desc, uniform=False)
if batch_desc_to_check in self.cudagraph_keys[CUDAGraphMode.FULL]:
return CUDAGraphMode.FULL, batch_desc_to_check