From 46f8a982b191e3a3d3a1eccaf18b184c391ac2ac Mon Sep 17 00:00:00 2001
From: Micah Williamson <micah.williamson@amd.com>
Date: Thu, 15 Jan 2026 18:55:57 -0600
Subject: [PATCH] [ROCm][CI] Enable AITER Unified Attention On ROCm For gpt-oss
 Test (#32431)

Signed-off-by: Micah Williamson <micah.williamson@amd.com>
---
 tests/entrypoints/openai/test_serving_chat.py | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/tests/entrypoints/openai/test_serving_chat.py b/tests/entrypoints/openai/test_serving_chat.py
index 2e0b0a63f..a6995e4ca 100644
--- a/tests/entrypoints/openai/test_serving_chat.py
+++ b/tests/entrypoints/openai/test_serving_chat.py
@@ -10,6 +10,7 @@ import pytest
 import pytest_asyncio
 from openai import OpenAI
 
+from vllm._aiter_ops import is_aiter_found_and_supported
 from vllm.config.multimodal import MultiModalConfig
 from vllm.entrypoints.openai.chat_completion.protocol import (
     ChatCompletionRequest,
@@ -106,9 +107,21 @@ def gptoss_speculative_server(default_server_args: list[str]):
         "--speculative-config",
         f'{{"model": "{GPT_OSS_SPECULATOR_NAME}", '
         f'"method": "eagle3", "num_speculative_tokens": 3}}',
-        "--attention-backend=TRITON_ATTN",
+        f"--attention-backend={
+            'TRITON_ATTN'
+            if not is_aiter_found_and_supported()
+            else 'ROCM_AITER_UNIFIED_ATTN'
+        }",
     ]
-    with RemoteOpenAIServer(GPT_OSS_MODEL_NAME, server_args) as remote_server:
+    # gpt-oss requires AITER unified attention on ROCm
+    # TODO: Remove after fixing TRITON_ATTN issue on ROCm
+    # https://github.com/vllm-project/vllm/issues/32434
+    env_dict = None
+    if is_aiter_found_and_supported():
+        env_dict = {"VLLM_ROCM_USE_AITER": "1"}
+    with RemoteOpenAIServer(
+        GPT_OSS_MODEL_NAME, server_args, env_dict=env_dict
+    ) as remote_server:
         yield remote_server