From 46f8a982b191e3a3d3a1eccaf18b184c391ac2ac Mon Sep 17 00:00:00 2001 From: Micah Williamson Date: Thu, 15 Jan 2026 18:55:57 -0600 Subject: [PATCH] [ROCm][CI] Enable AITER Unified Attention On ROCm For gpt-oss Test (#32431) Signed-off-by: Micah Williamson --- tests/entrypoints/openai/test_serving_chat.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/tests/entrypoints/openai/test_serving_chat.py b/tests/entrypoints/openai/test_serving_chat.py index 2e0b0a63f..a6995e4ca 100644 --- a/tests/entrypoints/openai/test_serving_chat.py +++ b/tests/entrypoints/openai/test_serving_chat.py @@ -10,6 +10,7 @@ import pytest import pytest_asyncio from openai import OpenAI +from vllm._aiter_ops import is_aiter_found_and_supported from vllm.config.multimodal import MultiModalConfig from vllm.entrypoints.openai.chat_completion.protocol import ( ChatCompletionRequest, @@ -106,9 +107,21 @@ def gptoss_speculative_server(default_server_args: list[str]): "--speculative-config", f'{{"model": "{GPT_OSS_SPECULATOR_NAME}", ' f'"method": "eagle3", "num_speculative_tokens": 3}}', - "--attention-backend=TRITON_ATTN", + f"--attention-backend={ + 'TRITON_ATTN' + if not is_aiter_found_and_supported() + else 'ROCM_AITER_UNIFIED_ATTN' + }", ] - with RemoteOpenAIServer(GPT_OSS_MODEL_NAME, server_args) as remote_server: + # gpt-oss requires AITER unified attention on ROCm + # TODO: Remove after fixing TRITON_ATTN issue on ROCm + # https://github.com/vllm-project/vllm/issues/32434 + env_dict = None + if is_aiter_found_and_supported(): + env_dict = {"VLLM_ROCM_USE_AITER": "1"} + with RemoteOpenAIServer( + GPT_OSS_MODEL_NAME, server_args, env_dict=env_dict + ) as remote_server: yield remote_server