[V0 Deprecation] Remove max_seq_len_to_capture (#25543)

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
2025-09-24 01:51:39 -07:00
parent 77a7fce1bb
commit 2e19a848d4
7 changed files with 2 additions and 48 deletions
--- a/vllm/config/model.py
+++ b/vllm/config/model.py
@@ -177,11 +177,6 @@ class ModelConfig:
    graph and always execute the model in eager mode. If False, we will use
    CUDA graph and eager execution in hybrid for maximal performance and
    flexibility."""
-    max_seq_len_to_capture: int = 8192
-    """Maximum sequence len covered by CUDA graphs. When a sequence has context
-    length larger than this, we fall back to eager mode. Additionally for
-    encoder-decoder models, if the sequence length of the encoder input is
-    larger than this, we fall back to the eager mode."""
    max_logprobs: int = 20
    """Maximum number of log probabilities to return when `logprobs` is
    specified in `SamplingParams`. The default value comes the default for the
@@ -1024,21 +1019,8 @@ class ModelConfig:
            current_platform.verify_quantization(self.quantization)

    def _verify_cuda_graph(self) -> None:
-        # The `max_seq_len_to_capture` was incorrectly
-        # based on the encoder's input length (448)
-        # but not the decoder's larger input length (1500).
-        # This change ensures the CUDA Graph captures the correct,
-        # larger sequence length, allowing it to work as intended.
-        effective_max_seq_len = self.max_model_len
-        if self.is_encoder_decoder:
-            effective_max_seq_len = max(
-                effective_max_seq_len,
-                getattr(self.hf_config, "max_source_positions", 0))
-        self.max_seq_len_to_capture = min(self.max_seq_len_to_capture,
-                                          effective_max_seq_len)
        # CUDAGraph capture not supported for encoder-decoder models on ROCm
        unsupported_rocm = self.is_encoder_decoder
-
        if (unsupported_rocm and not self.enforce_eager
                and current_platform.is_rocm()):
            logger.warning(