[V0 Deprecation] Remove max_seq_len_to_capture (#25543)

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
2025-09-24 01:51:39 -07:00
parent 77a7fce1bb
commit 2e19a848d4
7 changed files with 2 additions and 48 deletions
--- a/vllm/model_executor/models/config.py
+++ b/vllm/model_executor/models/config.py
@@ -245,19 +245,6 @@ class SnowflakeGteNewModelConfig(VerifyAndUpdateConfig):
        }


-class GraniteMoeHybridModelConfig(VerifyAndUpdateConfig):
-
-    @staticmethod
-    def verify_and_update_config(vllm_config: "VllmConfig") -> None:
-        config = vllm_config.model_config
-        config.max_seq_len_to_capture = config.max_model_len
-        logger.info(
-            "Setting max_seq_len_to_capture to %d "
-            "to ensure that CUDA graph capture "
-            "covers sequences of length up to max_model_len.",
-            config.max_model_len)
-
-
 class GptOssForCausalLMConfig(VerifyAndUpdateConfig):

    @staticmethod
@@ -426,7 +413,6 @@ MODELS_CONFIG_MAP: dict[str, type[VerifyAndUpdateConfig]] = {
    "XLMRobertaModel": JinaRobertaModelConfig,
    "JinaVLForRanking": JinaVLForSequenceClassificationConfig,
    "JambaForSequenceClassification": JambaForSequenceClassificationConfig,
-    "GraniteMoeHybridForCausalLM": GraniteMoeHybridModelConfig,
    "GptOssForCausalLM": GptOssForCausalLMConfig,
    "MambaForCausalLM": MambaModelConfig,
    "Mamba2ForCausalLM": MambaModelConfig,