From f2b6dfd237b1e9ba931121c6e83b6a4a3eb47680 Mon Sep 17 00:00:00 2001 From: Andreas Karatzas Date: Sun, 4 Jan 2026 20:17:05 -0600 Subject: [PATCH] [ROCm][CI] Fix language generation test accuracy by disabling HF flash_sdp and mem_efficient_sdp (#31597) Signed-off-by: Andreas Karatzas --- tests/models/language/generation/conftest.py | 28 ++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 tests/models/language/generation/conftest.py diff --git a/tests/models/language/generation/conftest.py b/tests/models/language/generation/conftest.py new file mode 100644 index 000000000..f423b656b --- /dev/null +++ b/tests/models/language/generation/conftest.py @@ -0,0 +1,28 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +"""Pytest configuration for vLLM language generation tests.""" + +import warnings + +import torch + +from vllm.platforms import current_platform + + +def pytest_sessionstart(session): + """Configure ROCm-specific settings before test session starts.""" + if not current_platform.is_rocm(): + return + + # Disable Flash/MemEfficient SDP on ROCm to avoid HF Transformers + # accuracy issues: https://github.com/vllm-project/vllm/issues/30167 + # TODO: Remove once ROCm SDP accuracy issues are resolved on HuggingFace + torch.backends.cuda.enable_flash_sdp(False) + torch.backends.cuda.enable_mem_efficient_sdp(False) + torch.backends.cuda.enable_math_sdp(True) + warnings.warn( + "ROCm: Disabled flash_sdp and mem_efficient_sdp, enabled math_sdp " + "to avoid HuggingFace Transformers accuracy issues", + UserWarning, + stacklevel=1, + )