[Core] Enable async scheduling by default (#27614)

Signed-off-by: Nick Hill <nickhill123@gmail.com>
2025-12-29 12:20:55 -08:00
parent b12cb38398
commit c2ff33cc8c
2 changed files with 31 additions and 14 deletions
--- a/vllm/config/scheduler.py
+++ b/vllm/config/scheduler.py
@@ -130,11 +130,12 @@ class SchedulerConfig:
    and starting configuration.
    """

-    async_scheduling: bool = False
-    """If set to True, perform async scheduling. This helps to avoid gaps in
-    GPU utilization, leading to better latency and throughput.
-    Async scheduling is currently not supported with some features such as
-    speculative decoding and pipeline parallelism.
+    async_scheduling: bool = Field(default=None)
+    """If set to False, disable async scheduling. Async scheduling helps to
+    avoid gaps in GPU utilization, leading to better latency and throughput.
+    It is currently not supported with some features such as
+    speculative decoding and pipeline parallelism, and will be automatically
+    disabled in those cases.
    """

    stream_interval: int = Field(default=1, ge=1)
--- a/vllm/config/vllm.py
+++ b/vllm/config/vllm.py
@@ -552,7 +552,7 @@ class VllmConfig:
                if self.speculative_config.method not in get_args(EagleModelTypes):
                    raise ValueError(
                        "Currently, async scheduling is only supported "
-                        "with EAGLE/MTP kind of speculative decoding"
+                        "with EAGLE/MTP kind of speculative decoding."
                    )
                if self.speculative_config.disable_padded_drafter_batch:
                    raise ValueError(
@@ -570,16 +570,27 @@ class VllmConfig:
                )
        elif self.scheduler_config.async_scheduling is None:
            # Enable async scheduling unless there is an incompatible option.
-            # NOTE: we won't reach here until async scheduling is enabled by default.
-            if (
-                self.parallel_config.pipeline_parallel_size > 1
-                or self.speculative_config is not None
-            ):
+            if self.parallel_config.pipeline_parallel_size > 1:
                logger.warning(
-                    "Async scheduling is not yet supported with speculative decoding "
-                    " or pipeline_parallel_size > 1 and will be disabled."
+                    "Async scheduling is not yet supported with "
+                    "pipeline_parallel_size > 1 and will be disabled."
                )
                self.scheduler_config.async_scheduling = False
+            elif self.speculative_config is not None:
+                if self.speculative_config.method not in get_args(EagleModelTypes):
+                    logger.warning(
+                        "Async scheduling not supported with %s-based "
+                        "speculative decoding and will be disabled.",
+                        self.speculative_config.method,
+                    )
+                else:
+                    logger.warning(
+                        "Async scheduling will be disabled because some features do "
+                        "not currently work in conjunction with speculative decoding. "
+                        "To use async scheduling with spec decoding anyway, "
+                        "enable it explicitly via async_scheduling=True."
+                    )
+                self.scheduler_config.async_scheduling = False
            elif not executor_supports_async_sched:
                logger.warning(
                    "Async scheduling will be disabled because it is not supported "
@@ -595,11 +606,16 @@ class VllmConfig:
            self.scheduler_config.async_scheduling
            and not self.parallel_config.disable_nccl_for_dp_synchronization
        ):
-            logger.info(
+            logger.info_once(
                "Disabling NCCL for DP synchronization when using async scheduling."
            )
            self.parallel_config.disable_nccl_for_dp_synchronization = True

+        logger.info_once(
+            "Asynchronous scheduling is %s.",
+            "enabled" if self.scheduler_config.async_scheduling else "disabled",
+        )
+
        from vllm.platforms import current_platform

        if (