[Bugfix] Fix cudagraph_mode:FULL dispatch (This does not impact FULL_AND_PIECEWISE (default)) (#36165)
This commit is contained in:
@@ -293,16 +293,14 @@ class CudagraphDispatcher:
|
||||
)
|
||||
effective_num_active_loras = self.vllm_config.lora_config.max_loras + 1
|
||||
|
||||
normalized_uniform = uniform_decode and self.cudagraph_mode.separate_routine()
|
||||
batch_desc = self._create_padded_batch_descriptor(
|
||||
num_tokens, uniform_decode, has_lora, effective_num_active_loras
|
||||
num_tokens, normalized_uniform, has_lora, effective_num_active_loras
|
||||
)
|
||||
|
||||
if CUDAGraphMode.FULL in allowed_modes:
|
||||
# check if key exists for full cudagraph
|
||||
# For pure FULL mode, keys are registered with uniform=False.
|
||||
batch_desc_to_check = batch_desc
|
||||
if self.cudagraph_mode == CUDAGraphMode.FULL:
|
||||
batch_desc_to_check = replace(batch_desc, uniform=False)
|
||||
if batch_desc_to_check in self.cudagraph_keys[CUDAGraphMode.FULL]:
|
||||
return CUDAGraphMode.FULL, batch_desc_to_check
|
||||
|
||||
|
||||
Reference in New Issue
Block a user