[Chore] Rename SchedulerConfig.chunked_prefill_enabled (#28735)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2025-11-15 02:39:57 +08:00
parent 67187554dd
commit e2741f6cbc
9 changed files with 21 additions and 19 deletions
--- a/vllm/config/scheduler.py
+++ b/vllm/config/scheduler.py
@@ -8,7 +8,7 @@ from typing import TYPE_CHECKING, Any, ClassVar, Literal, cast

 from pydantic import Field, field_validator, model_validator
 from pydantic.dataclasses import dataclass
-from typing_extensions import Self
+from typing_extensions import Self, deprecated

 from vllm.config.utils import config
 from vllm.logger import init_logger
@@ -233,6 +233,11 @@ class SchedulerConfig:
            )

    @property
+    @deprecated(
+        "`SchedulerConfig.chunked_prefill_enabled` has been renamed to "
+        "`SchedulerConfig.enable_chunked_prefill`. "
+        "The old name will be removed in v0.12."
+    )
    def chunked_prefill_enabled(self) -> bool:
        return self.enable_chunked_prefill

@@ -244,7 +249,7 @@ class SchedulerConfig:
    def _verify_args(self) -> Self:
        if (
            self.max_num_batched_tokens < self.max_model_len
-            and not self.chunked_prefill_enabled
+            and not self.enable_chunked_prefill
        ):
            raise ValueError(
                f"max_num_batched_tokens ({self.max_num_batched_tokens}) is "
@@ -271,7 +276,7 @@ class SchedulerConfig:
            )

        if self.max_num_partial_prefills > 1:
-            if not self.chunked_prefill_enabled:
+            if not self.enable_chunked_prefill:
                raise ValueError(
                    "Chunked prefill must be enabled to set "
                    "max_num_partial_prefills > 1."
--- a/vllm/config/vllm.py
+++ b/vllm/config/vllm.py
@@ -411,7 +411,7 @@ class VllmConfig:

        if (
            self.model_config is not None
-            and self.scheduler_config.chunked_prefill_enabled
+            and self.scheduler_config.enable_chunked_prefill
            and self.model_config.dtype == torch.float32
            and current_platform.get_device_capability() == (7, 5)
        ):
@@ -584,7 +584,7 @@ class VllmConfig:
        ):
            for reason in disable_chunked_prefill_reasons:
                logger.info(reason)
-            self.scheduler_config.chunked_prefill_enabled = False
+            self.scheduler_config.enable_chunked_prefill = False
            self.scheduler_config.long_prefill_token_threshold = 0

            if self.cache_config is not None:
@@ -1026,7 +1026,7 @@ class VllmConfig:
            f"seed={self.model_config.seed}, "
            f"served_model_name={self.model_config.served_model_name}, "
            f"enable_prefix_caching={self.cache_config.enable_prefix_caching}, "
-            f"chunked_prefill_enabled={self.scheduler_config.chunked_prefill_enabled}, "  # noqa
+            f"enable_chunked_prefill={self.scheduler_config.enable_chunked_prefill}, "  # noqa
            f"pooler_config={self.model_config.pooler_config!r}, "
            f"compilation_config={self.compilation_config!r}"
        )
--- a/vllm/platforms/cpu.py
+++ b/vllm/platforms/cpu.py
@@ -192,7 +192,7 @@ class CpuPlatform(Platform):

        scheduler_config = vllm_config.scheduler_config
        if (
-            scheduler_config.chunked_prefill_enabled
+            scheduler_config.enable_chunked_prefill
            or cache_config.enable_prefix_caching
        ) and cache_config.cache_dtype != "auto":
            raise RuntimeError(
--- a/vllm/v1/core/sched/scheduler.py
+++ b/vllm/v1/core/sched/scheduler.py
@@ -497,7 +497,7 @@ class Scheduler(SchedulerInterface):
                    # chunked prefill has to be enabled explicitly to allow
                    # pooling requests to be chunked
                    if (
-                        not self.scheduler_config.chunked_prefill_enabled
+                        not self.scheduler_config.enable_chunked_prefill
                        and num_new_tokens > token_budget
                    ):
                        self.waiting.pop_request()
--- a/vllm/v1/engine/core.py
+++ b/vllm/v1/engine/core.py
@@ -124,7 +124,7 @@ class EngineCore:
            # Encoder models without KV cache don't support
            # chunked prefill. But do SSM models?
            logger.info("Disabling chunked prefill for model without KVCache")
-            vllm_config.scheduler_config.chunked_prefill_enabled = False
+            vllm_config.scheduler_config.enable_chunked_prefill = False

        scheduler_block_size = (
            vllm_config.cache_config.block_size
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -2031,7 +2031,7 @@ class GPUModelRunner(

        supported_tasks = list(model.pooler.get_supported_tasks())

-        if self.scheduler_config.chunked_prefill_enabled:
+        if self.scheduler_config.enable_chunked_prefill:
            if "token_embed" in supported_tasks:
                supported_tasks.remove("token_embed")
            if "token_classify" in supported_tasks:
@@ -3825,7 +3825,7 @@ class GPUModelRunner(
        supported_pooling_tasks = self.get_supported_pooling_tasks()

        if not supported_pooling_tasks:
-            if self.scheduler_config.chunked_prefill_enabled:
+            if self.scheduler_config.enable_chunked_prefill:
                raise RuntimeError(
                    f"Model {self.model_config.model} does not support "
                    "any pooling tasks with chunked prefill enabled. "