[ROCm][CI] Prep Tests For Change To ROCM_ATTN As New Default Backend On ROCm (#36025)

Signed-off-by: Micah Williamson <micah.williamson@amd.com>
2026-03-09 13:27:55 -05:00
parent 3fd03f1ec2
commit 4ff9b045fe
10 changed files with 32 additions and 10 deletions
--- a/.buildkite/lm-eval-harness/test_lm_eval_correctness.py
+++ b/.buildkite/lm-eval-harness/test_lm_eval_correctness.py
@@ -13,9 +13,10 @@ import os
 from contextlib import contextmanager

 import lm_eval
-import numpy as np
 import yaml

+from vllm.platforms import current_platform
+
 DEFAULT_RTOL = 0.08


@@ -63,6 +64,9 @@ def launch_lm_eval(eval_config, tp_size):
        "allow_deprecated_quantization=True,"
    )

+    if current_platform.is_rocm() and "Nemotron-3" in eval_config["model_name"]:
+        model_args += "attention_backend=TRITON_ATTN"
+
    env_vars = eval_config.get("env_vars", None)
    with scoped_env_vars(env_vars):
        results = lm_eval.simple_evaluate(
@@ -102,6 +106,8 @@ def test_lm_eval_correctness_param(config_filename, tp_size):
                f"ground_truth={ground_truth:.3f} | "
                f"measured={measured_value:.3f} | rtol={rtol}"
            )
-            success = success and np.isclose(ground_truth, measured_value, rtol=rtol)
+
+            min_acceptable = ground_truth * (1 - rtol)
+            success = success and measured_value >= min_acceptable

    assert success
--- a/.buildkite/scripts/scheduled_integration_test/qwen3_next_mtp_async_eplb.sh
+++ b/.buildkite/scripts/scheduled_integration_test/qwen3_next_mtp_async_eplb.sh
@@ -24,7 +24,7 @@ if command -v rocm-smi &> /dev/null || [[ -d /opt/rocm ]] || [[ -n "${ROCM_PATH:
  BACKENDS=("allgather_reducescatter")
  # Disable MOE padding for ROCm since it is causing eplb to fail
  export VLLM_ROCM_MOE_PADDING=0
-  PLATFORM_ARGS=("--no-async-scheduling")
+  PLATFORM_ARGS=("--no-async-scheduling" "--attention-backend=TRITON_ATTN")
  echo "Disabled async scheduling for ROCm platform due to issues with spec decode."
 else
  # Non-ROCm platform (CUDA/other)
--- a/.buildkite/test-amd.yaml
+++ b/.buildkite/test-amd.yaml
@@ -529,7 +529,7 @@ steps:
  commands:
    - pip install tensorizer # for tensorizer test
    # for basic
-    - python3 basic/offline_inference/chat.py
+    - python3 basic/offline_inference/chat.py --attention-backend TRITON_ATTN
    - python3 basic/offline_inference/generate.py --model facebook/opt-125m
    - python3 basic/offline_inference/generate.py --model meta-llama/Llama-2-13b-chat-hf --cpu-offload-gb 10
    - python3 basic/offline_inference/classify.py
@@ -2208,7 +2208,7 @@ steps:
  commands:
    - pip install tensorizer # for tensorizer test
    # for basic
-    - python3 basic/offline_inference/chat.py
+    - python3 basic/offline_inference/chat.py --attention-backend TRITON_ATTN
    - python3 basic/offline_inference/generate.py --model facebook/opt-125m
    - python3 basic/offline_inference/generate.py --model meta-llama/Llama-2-13b-chat-hf --cpu-offload-gb 10
    - python3 basic/offline_inference/classify.py