clean unused cudagraph_batch_sizes (#35552)

Signed-off-by: Boyuan Feng <boyuan@meta.com>
This commit is contained in:
Boyuan Feng
2026-03-02 14:00:16 -08:00
committed by GitHub
parent fa6a6be519
commit c42dc402c1

View File

@@ -597,15 +597,6 @@ class GPUModelRunner(
self.async_output_copy_stream = torch.cuda.Stream()
self.prepare_inputs_event = torch.Event()
# self.cudagraph_batch_sizes sorts in ascending order.
if (
self.compilation_config.cudagraph_capture_sizes
and self.compilation_config.cudagraph_mode != CUDAGraphMode.NONE
):
self.cudagraph_batch_sizes = sorted(
self.compilation_config.cudagraph_capture_sizes
)
# Cache the device properties.
self._init_device_properties()
@@ -5705,10 +5696,6 @@ class GPUModelRunner(
self.compilation_config.adjust_cudagraph_sizes_for_spec_decode(
self.uniform_decode_query_len, self.parallel_config.tensor_parallel_size
)
capture_sizes = self.compilation_config.cudagraph_capture_sizes
self.cudagraph_batch_sizes = (
capture_sizes if capture_sizes is not None else []
)
# Trigger cudagraph dispatching keys initialization after
# resolved cudagraph mode.