[Misc] Remove pad_for_cudagraphs from config (#30143)

Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com> Signed-off-by: Matthew Bonanni <mbonanni@redhat.com> Co-authored-by: Matthew Bonanni <mbonanni@redhat.com>
2026-01-20 13:05:48 -07:00
parent 86c69dc54c
commit 2261340806
9 changed files with 157 additions and 136 deletions
--- a/vllm/config/compilation.py
+++ b/vllm/config/compilation.py
@@ -581,15 +581,6 @@ class CompilationConfig:
    local_cache_dir: str = field(default=None, init=False)  # type: ignore
    """local cache dir for each rank"""

-    bs_to_padded_graph_size: list[int] = field(
-        default=None,  # type: ignore
-        init=False,
-    )
-    """optimization:
-    Intuitively, bs_to_padded_graph_size should be dict[int, int].
-    since we know all keys are in a range [0, max_cudagraph_capture_size],
-    we can optimize it to list[int] for better lookup performance."""
-
    # keep track of enabled and disabled custom ops
    enabled_custom_ops: Counter[str] = field(default_factory=Counter, init=False)
    """custom ops that are enabled"""
@@ -639,7 +630,6 @@ class CompilationConfig:
            "debug_dump_path",
            "cache_dir",
            "local_cache_dir",
-            "bs_to_padded_graph_size",
            "traced_files",
            "compilation_time",
            "static_forward_context",
@@ -661,7 +651,6 @@ class CompilationConfig:
            "enabled_custom_ops": True,
            "disabled_custom_ops": True,
            "compilation_time": True,
-            "bs_to_padded_graph_size": True,
            "traced_files": True,
            "inductor_compile_config": {
                "post_grad_custom_post_pass": True,
@@ -882,7 +871,6 @@ class CompilationConfig:
        """To complete the initialization after cudagraph related
        configs are set. This includes:
        - initialize compile_sizes
-        - pre-compute the mapping bs_to_padded_graph_size
        """

        computed_compile_sizes = []
@@ -906,23 +894,6 @@ class CompilationConfig:
        if self.cudagraph_capture_sizes:
            assert self.cudagraph_capture_sizes[-1] == self.max_cudagraph_capture_size

-        # May get recomputed in the model runner if adjustment is needed for spec-decode
-        self.compute_bs_to_padded_graph_size()
-
-        # Validate that compile_sizes won't be changed by padding.
-        # Only validate when cudagraphs are actually being used.
-        if self.compile_sizes and self.cudagraph_mode != CUDAGraphMode.NONE:
-            for size in self.compile_sizes:
-                if size <= self.max_cudagraph_capture_size:
-                    padded = self.bs_to_padded_graph_size[size]
-                    if padded != size:
-                        raise ValueError(
-                            f"compile_sizes contains {size} which would be "
-                            f"padded to {padded}. All compile_sizes must be "
-                            "values that won't be changed by cudagraph padding. "
-                            "Use values from cudagraph_capture_sizes."
-                        )
-
    def set_splitting_ops_for_v1(
        self, all2all_backend: str, data_parallel_size: int = 1
    ):
@@ -1134,24 +1105,6 @@ class CompilationConfig:
        self.max_cudagraph_capture_size = rounded_sizes[-1]
        self.cudagraph_capture_sizes = rounded_sizes

-        # Recompute after adjusting the cudagraph sizes
-        self.compute_bs_to_padded_graph_size()
-
-    def compute_bs_to_padded_graph_size(self):
-        # pre-compute the mapping from batch size to padded graph size
-        self.bs_to_padded_graph_size = [
-            0 for i in range(self.max_cudagraph_capture_size + 1)
-        ]
-        for end, start in zip(
-            self.cudagraph_capture_sizes + [self.max_cudagraph_capture_size + 1],
-            [0] + self.cudagraph_capture_sizes,
-        ):
-            for bs in range(start, end):
-                if bs == start:
-                    self.bs_to_padded_graph_size[bs] = start
-                else:
-                    self.bs_to_padded_graph_size[bs] = end
-
    def get_compile_ranges(self) -> list[Range]:
        """Get the compile ranges for the compilation config."""
        if self.compile_ranges_split_points is None: