diff --git a/tests/models/multimodal/conftest.py b/tests/models/multimodal/conftest.py index 31d99218c..3f53b3fe6 100644 --- a/tests/models/multimodal/conftest.py +++ b/tests/models/multimodal/conftest.py @@ -30,3 +30,22 @@ def pytest_collection_modifyitems(config, items): UserWarning, stacklevel=1, ) + + +def patch_hf_vision_attn_for_rocm(model): + """Force SDPA for HF vision encoders on ROCm. + + HF's flash_attention_2 has accuracy issues on ROCm that bypass + torch.backends.cuda settings. This forces SDPA which then uses + math_sdp via the pytest_collection_modifyitems settings. + """ + if not current_platform.is_rocm(): + return + + inner = getattr(model, "model", model) + + if hasattr(inner, "vision_embedding"): + vit = inner.vision_embedding[0] + for layer in vit.encoder.layers: + if hasattr(layer, "self_attn"): + layer.self_attn.vision_config._attn_implementation = "sdpa" diff --git a/tests/models/multimodal/generation/vlm_utils/model_utils.py b/tests/models/multimodal/generation/vlm_utils/model_utils.py index acc180218..27129ba8f 100644 --- a/tests/models/multimodal/generation/vlm_utils/model_utils.py +++ b/tests/models/multimodal/generation/vlm_utils/model_utils.py @@ -576,6 +576,14 @@ def isaac_patch_hf_runner(hf_model: HfRunner) -> HfRunner: # ---------------------------- isaac_model = hf_model.model.model + # [ROCm] Disable Flash/MemEfficient SDP on ROCm to avoid HF Transformers + # accuracy issues: https://github.com/vllm-project/vllm/issues/30167 + # TODO: Remove once ROCm SDP accuracy issues are resolved on HuggingFace + # ---------------------------- + from ...conftest import patch_hf_vision_attn_for_rocm + + patch_hf_vision_attn_for_rocm(hf_model.model) + def patched_forward( self, input_ids=None,