diff --git a/tests/models/language/generation/conftest.py b/tests/models/language/generation/conftest.py new file mode 100644 index 000000000..f423b656b --- /dev/null +++ b/tests/models/language/generation/conftest.py @@ -0,0 +1,28 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +"""Pytest configuration for vLLM language generation tests.""" + +import warnings + +import torch + +from vllm.platforms import current_platform + + +def pytest_sessionstart(session): + """Configure ROCm-specific settings before test session starts.""" + if not current_platform.is_rocm(): + return + + # Disable Flash/MemEfficient SDP on ROCm to avoid HF Transformers + # accuracy issues: https://github.com/vllm-project/vllm/issues/30167 + # TODO: Remove once ROCm SDP accuracy issues are resolved on HuggingFace + torch.backends.cuda.enable_flash_sdp(False) + torch.backends.cuda.enable_mem_efficient_sdp(False) + torch.backends.cuda.enable_math_sdp(True) + warnings.warn( + "ROCm: Disabled flash_sdp and mem_efficient_sdp, enabled math_sdp " + "to avoid HuggingFace Transformers accuracy issues", + UserWarning, + stacklevel=1, + )