[V0 Deprecation] Remove async_output_proc, preemption mode, delay factor (#25334)

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
2025-09-21 08:52:32 -07:00
parent 26e673fe93
commit 0ff8ebb2d7
15 changed files with 12 additions and 210 deletions
--- a/vllm/platforms/rocm.py
+++ b/vllm/platforms/rocm.py
@@ -310,16 +310,6 @@ class RocmPlatform(Platform):
        device_props = torch.cuda.get_device_properties(device_id)
        return device_props.total_memory

-    @classmethod
-    def is_async_output_supported(cls, enforce_eager: Optional[bool]) -> bool:
-        if enforce_eager and not envs.VLLM_USE_V1:
-            logger.warning(
-                "To see benefits of async output processing, enable CUDA "
-                "graph. Since, enforce-eager is enabled, async output "
-                "processor cannot be used")
-            return False
-        return True
-
    @classmethod
    def check_and_update_config(cls, vllm_config: "VllmConfig") -> None:
        from vllm.config.compilation import CUDAGraphMode