diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py index 03f0c1527..240724a67 100644 --- a/vllm/platforms/cuda.py +++ b/vllm/platforms/cuda.py @@ -99,7 +99,7 @@ class CudaPlatformBase(Platform): @classmethod def is_async_output_supported(cls, enforce_eager: Optional[bool]) -> bool: - if enforce_eager: + if enforce_eager and not envs.VLLM_USE_V1: logger.warning( "To see benefits of async output processing, enable CUDA " "graph. Since, enforce-eager is enabled, async output " diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py index 04637f5c7..e9e18d3fe 100644 --- a/vllm/platforms/rocm.py +++ b/vllm/platforms/rocm.py @@ -299,7 +299,7 @@ class RocmPlatform(Platform): @classmethod def is_async_output_supported(cls, enforce_eager: Optional[bool]) -> bool: - if enforce_eager: + if enforce_eager and not envs.VLLM_USE_V1: logger.warning( "To see benefits of async output processing, enable CUDA " "graph. Since, enforce-eager is enabled, async output "