[V0 Deprecation] Remove async_output_proc, preemption mode, delay factor (#25334)

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
Woosuk Kwon
2025-09-21 08:52:32 -07:00
committed by GitHub
parent 26e673fe93
commit 0ff8ebb2d7
15 changed files with 12 additions and 210 deletions

View File

@@ -310,16 +310,6 @@ class RocmPlatform(Platform):
device_props = torch.cuda.get_device_properties(device_id)
return device_props.total_memory
@classmethod
def is_async_output_supported(cls, enforce_eager: Optional[bool]) -> bool:
if enforce_eager and not envs.VLLM_USE_V1:
logger.warning(
"To see benefits of async output processing, enable CUDA "
"graph. Since, enforce-eager is enabled, async output "
"processor cannot be used")
return False
return True
@classmethod
def check_and_update_config(cls, vllm_config: "VllmConfig") -> None:
from vllm.config.compilation import CUDAGraphMode