[ROCm][CI] Enable AITER Unified Attention On ROCm For gpt-oss Test (#32431)
Signed-off-by: Micah Williamson <micah.williamson@amd.com>
This commit is contained in:
@@ -10,6 +10,7 @@ import pytest
|
|||||||
import pytest_asyncio
|
import pytest_asyncio
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
|
|
||||||
|
from vllm._aiter_ops import is_aiter_found_and_supported
|
||||||
from vllm.config.multimodal import MultiModalConfig
|
from vllm.config.multimodal import MultiModalConfig
|
||||||
from vllm.entrypoints.openai.chat_completion.protocol import (
|
from vllm.entrypoints.openai.chat_completion.protocol import (
|
||||||
ChatCompletionRequest,
|
ChatCompletionRequest,
|
||||||
@@ -106,9 +107,21 @@ def gptoss_speculative_server(default_server_args: list[str]):
|
|||||||
"--speculative-config",
|
"--speculative-config",
|
||||||
f'{{"model": "{GPT_OSS_SPECULATOR_NAME}", '
|
f'{{"model": "{GPT_OSS_SPECULATOR_NAME}", '
|
||||||
f'"method": "eagle3", "num_speculative_tokens": 3}}',
|
f'"method": "eagle3", "num_speculative_tokens": 3}}',
|
||||||
"--attention-backend=TRITON_ATTN",
|
f"--attention-backend={
|
||||||
|
'TRITON_ATTN'
|
||||||
|
if not is_aiter_found_and_supported()
|
||||||
|
else 'ROCM_AITER_UNIFIED_ATTN'
|
||||||
|
}",
|
||||||
]
|
]
|
||||||
with RemoteOpenAIServer(GPT_OSS_MODEL_NAME, server_args) as remote_server:
|
# gpt-oss requires AITER unified attention on ROCm
|
||||||
|
# TODO: Remove after fixing TRITON_ATTN issue on ROCm
|
||||||
|
# https://github.com/vllm-project/vllm/issues/32434
|
||||||
|
env_dict = None
|
||||||
|
if is_aiter_found_and_supported():
|
||||||
|
env_dict = {"VLLM_ROCM_USE_AITER": "1"}
|
||||||
|
with RemoteOpenAIServer(
|
||||||
|
GPT_OSS_MODEL_NAME, server_args, env_dict=env_dict
|
||||||
|
) as remote_server:
|
||||||
yield remote_server
|
yield remote_server
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user