From 179547d62c73e7174bf42b8ca0a34177ac3a5c9e Mon Sep 17 00:00:00 2001 From: Andreas Karatzas Date: Mon, 9 Mar 2026 19:55:20 -0500 Subject: [PATCH] [ROCm][CI] Fix ROCm GPT-OSS Eval test group (#36179) Signed-off-by: Andreas Karatzas --- .buildkite/test-amd.yaml | 8 ++++---- .../evals/gpt_oss/configs/gpt-oss-20b-rocm-baseline.yaml | 6 ++++++ tests/evals/gpt_oss/configs/models-gfx942.txt | 3 +++ tests/evals/gpt_oss/configs/models-gfx950.txt | 3 +++ 4 files changed, 16 insertions(+), 4 deletions(-) create mode 100644 tests/evals/gpt_oss/configs/gpt-oss-20b-rocm-baseline.yaml create mode 100644 tests/evals/gpt_oss/configs/models-gfx942.txt create mode 100644 tests/evals/gpt_oss/configs/models-gfx950.txt diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml index 9e10a00db..91ceda2f6 100644 --- a/.buildkite/test-amd.yaml +++ b/.buildkite/test-amd.yaml @@ -1639,8 +1639,8 @@ steps: - vllm/model_executor/layers/quantization/mxfp4.py - vllm/v1/attention/backends/flashinfer.py commands: - - uv pip install --system 'gpt-oss[eval]==0.0.5' - - VLLM_ROCM_USE_AITER_MHA=0 VLLM_ROCM_USE_AITER=1 VLLM_USE_AITER_UNIFIED_ATTENTION=1 pytest -s -v tests/evals/gpt_oss/test_gpqa_correctness.py --model openai/gpt-oss-20b --metric 0.58 + - uv pip install --system 'gpt-oss[eval]==0.0.5' + - pytest -s -v evals/gpt_oss/test_gpqa_correctness.py --config-list-file=configs/models-gfx942.txt ##### EPLB Accuracy Tests ##### - label: DeepSeek V2-Lite Accuracy @@ -3296,8 +3296,8 @@ steps: - vllm/model_executor/layers/quantization/mxfp4.py - vllm/v1/attention/backends/flashinfer.py commands: - - uv pip install --system 'gpt-oss[eval]==0.0.5' - - VLLM_ROCM_USE_AITER_MHA=0 VLLM_ROCM_USE_AITER=1 VLLM_USE_AITER_UNIFIED_ATTENTION=1 pytest -s -v tests/evals/gpt_oss/test_gpqa_correctness.py --model openai/gpt-oss-20b --metric 0.58 + - uv pip install --system 'gpt-oss[eval]==0.0.5' + - pytest -s -v evals/gpt_oss/test_gpqa_correctness.py --config-list-file=configs/models-gfx950.txt ##### EPLB Accuracy Tests ##### - label: DeepSeek V2-Lite Accuracy diff --git a/tests/evals/gpt_oss/configs/gpt-oss-20b-rocm-baseline.yaml b/tests/evals/gpt_oss/configs/gpt-oss-20b-rocm-baseline.yaml new file mode 100644 index 000000000..76b1d7962 --- /dev/null +++ b/tests/evals/gpt_oss/configs/gpt-oss-20b-rocm-baseline.yaml @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +model_name: openai/gpt-oss-20b +metric_threshold: 0.568 +reasoning_effort: low +server_args: "--attention-backend ROCM_AITER_UNIFIED_ATTN" \ No newline at end of file diff --git a/tests/evals/gpt_oss/configs/models-gfx942.txt b/tests/evals/gpt_oss/configs/models-gfx942.txt new file mode 100644 index 000000000..48cef0122 --- /dev/null +++ b/tests/evals/gpt_oss/configs/models-gfx942.txt @@ -0,0 +1,3 @@ +# GFX942 model configurations for GPQA evaluation +# Tests different environment variable combinations +gpt-oss-20b-rocm-baseline.yaml \ No newline at end of file diff --git a/tests/evals/gpt_oss/configs/models-gfx950.txt b/tests/evals/gpt_oss/configs/models-gfx950.txt new file mode 100644 index 000000000..2b6ff4f4a --- /dev/null +++ b/tests/evals/gpt_oss/configs/models-gfx950.txt @@ -0,0 +1,3 @@ +# GFX950 model configurations for GPQA evaluation +# Tests different environment variable combinations +gpt-oss-20b-rocm-baseline.yaml \ No newline at end of file