diff --git a/.buildkite/lm-eval-harness/test_lm_eval_correctness.py b/.buildkite/lm-eval-harness/test_lm_eval_correctness.py
index a22abe73e..fad5f593b 100644
--- a/.buildkite/lm-eval-harness/test_lm_eval_correctness.py
+++ b/.buildkite/lm-eval-harness/test_lm_eval_correctness.py
@@ -13,9 +13,10 @@ import os
 from contextlib import contextmanager
 
 import lm_eval
-import numpy as np
 import yaml
 
+from vllm.platforms import current_platform
+
 DEFAULT_RTOL = 0.08
 
 
@@ -63,6 +64,9 @@ def launch_lm_eval(eval_config, tp_size):
         "allow_deprecated_quantization=True,"
     )
 
+    if current_platform.is_rocm() and "Nemotron-3" in eval_config["model_name"]:
+        model_args += "attention_backend=TRITON_ATTN"
+
     env_vars = eval_config.get("env_vars", None)
     with scoped_env_vars(env_vars):
         results = lm_eval.simple_evaluate(
@@ -102,6 +106,8 @@ def test_lm_eval_correctness_param(config_filename, tp_size):
                 f"ground_truth={ground_truth:.3f} | "
                 f"measured={measured_value:.3f} | rtol={rtol}"
             )
-            success = success and np.isclose(ground_truth, measured_value, rtol=rtol)
+
+            min_acceptable = ground_truth * (1 - rtol)
+            success = success and measured_value >= min_acceptable
 
     assert success
diff --git a/.buildkite/scripts/scheduled_integration_test/qwen3_next_mtp_async_eplb.sh b/.buildkite/scripts/scheduled_integration_test/qwen3_next_mtp_async_eplb.sh
index e875ac466..d587f26ae 100644
--- a/.buildkite/scripts/scheduled_integration_test/qwen3_next_mtp_async_eplb.sh
+++ b/.buildkite/scripts/scheduled_integration_test/qwen3_next_mtp_async_eplb.sh
@@ -24,7 +24,7 @@ if command -v rocm-smi &> /dev/null || [[ -d /opt/rocm ]] || [[ -n "${ROCM_PATH:
   BACKENDS=("allgather_reducescatter")
   # Disable MOE padding for ROCm since it is causing eplb to fail
   export VLLM_ROCM_MOE_PADDING=0
-  PLATFORM_ARGS=("--no-async-scheduling")
+  PLATFORM_ARGS=("--no-async-scheduling" "--attention-backend=TRITON_ATTN")
   echo "Disabled async scheduling for ROCm platform due to issues with spec decode."
 else
   # Non-ROCm platform (CUDA/other)
diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml
index ad11f3764..9e10a00db 100644
--- a/.buildkite/test-amd.yaml
+++ b/.buildkite/test-amd.yaml
@@ -529,7 +529,7 @@ steps:
   commands:
     - pip install tensorizer # for tensorizer test
     # for basic
-    - python3 basic/offline_inference/chat.py
+    - python3 basic/offline_inference/chat.py --attention-backend TRITON_ATTN
     - python3 basic/offline_inference/generate.py --model facebook/opt-125m
     - python3 basic/offline_inference/generate.py --model meta-llama/Llama-2-13b-chat-hf --cpu-offload-gb 10
     - python3 basic/offline_inference/classify.py
@@ -2208,7 +2208,7 @@ steps:
   commands:
     - pip install tensorizer # for tensorizer test
     # for basic
-    - python3 basic/offline_inference/chat.py
+    - python3 basic/offline_inference/chat.py --attention-backend TRITON_ATTN
     - python3 basic/offline_inference/generate.py --model facebook/opt-125m
     - python3 basic/offline_inference/generate.py --model meta-llama/Llama-2-13b-chat-hf --cpu-offload-gb 10
     - python3 basic/offline_inference/classify.py
diff --git a/tests/entrypoints/openai/test_tensorizer_entrypoint.py b/tests/entrypoints/openai/test_tensorizer_entrypoint.py
index 3cb64d50a..9ac9106db 100644
--- a/tests/entrypoints/openai/test_tensorizer_entrypoint.py
+++ b/tests/entrypoints/openai/test_tensorizer_entrypoint.py
@@ -15,6 +15,7 @@ from vllm.model_executor.model_loader.tensorizer import (
     tensorize_lora_adapter,
     tensorize_vllm_model,
 )
+from vllm.platforms import current_platform
 
 from ...utils import RemoteOpenAIServer
 
@@ -74,6 +75,8 @@ def server(model_uri, tensorize_model_and_lora):
         MODEL_NAME,
         "--enable-lora",
     ]
+    if current_platform.is_rocm():
+        args += ["--attention-backend", "TRITON_ATTN"]
 
     model_dir = os.path.dirname(model_uri)
     with RemoteOpenAIServer(model_dir, args) as remote_server:
diff --git a/tests/models/language/pooling_mteb_test/test_gte.py b/tests/models/language/pooling_mteb_test/test_gte.py
index f87fd832a..0c35d66c3 100644
--- a/tests/models/language/pooling_mteb_test/test_gte.py
+++ b/tests/models/language/pooling_mteb_test/test_gte.py
@@ -8,6 +8,7 @@ from tests.models.utils import (
     EmbedModelInfo,
     RerankModelInfo,
 )
+from vllm.platforms import current_platform
 
 from .mteb_embed_utils import mteb_test_embed_models
 from .mteb_score_utils import mteb_test_rerank_models
@@ -142,4 +143,9 @@ def test_embed_models_correctness(
 
 @pytest.mark.parametrize("model_info", RERANK_MODELS)
 def test_rerank_models_mteb(vllm_runner, model_info: RerankModelInfo) -> None:
-    mteb_test_rerank_models(vllm_runner, model_info)
+    vllm_extra_kwargs = {}
+    if current_platform.is_rocm():
+        vllm_extra_kwargs["attention_backend"] = "TRITON_ATTN"
+    mteb_test_rerank_models(
+        vllm_runner, model_info, vllm_extra_kwargs=vllm_extra_kwargs
+    )
diff --git a/tests/models/multimodal/generation/test_common.py b/tests/models/multimodal/generation/test_common.py
index c4b82b93e..979aa96af 100644
--- a/tests/models/multimodal/generation/test_common.py
+++ b/tests/models/multimodal/generation/test_common.py
@@ -173,6 +173,9 @@ VLM_TEST_SETTINGS = {
         marks=[
             pytest.mark.core_model,
         ],
+        vllm_runner_kwargs={"attention_backend": "TRITON_ATTN"}
+        if current_platform.is_rocm()
+        else {},
     ),
     "ultravox": VLMTestInfo(
         models=["fixie-ai/ultravox-v0_5-llama-3_2-1b"],
diff --git a/tests/test_regression.py b/tests/test_regression.py
index 2fc0308ff..ac82206f7 100644
--- a/tests/test_regression.py
+++ b/tests/test_regression.py
@@ -13,6 +13,7 @@ import pytest
 import torch
 
 from vllm import LLM, SamplingParams
+from vllm.platforms import current_platform
 
 
 @pytest.mark.skip(reason="In V1, we reject tokens > max_seq_len")
@@ -65,7 +66,8 @@ def test_model_from_modelscope(monkeypatch: pytest.MonkeyPatch):
         # Don't use HF_TOKEN for ModelScope repos, otherwise it will fail
         # with 400 Client Error: Bad Request.
         m.setenv("HF_TOKEN", "")
-        llm = LLM(model="qwen/Qwen1.5-0.5B-Chat")
+        attn_backend = "TRITON_ATTN" if current_platform.is_rocm() else "auto"
+        llm = LLM(model="qwen/Qwen1.5-0.5B-Chat", attention_backend=attn_backend)
 
         prompts = [
             "Hello, my name is",
diff --git a/tests/v1/e2e/test_kv_sharing_fast_prefill.py b/tests/v1/e2e/test_kv_sharing_fast_prefill.py
index f895fb72e..92b4d4532 100644
--- a/tests/v1/e2e/test_kv_sharing_fast_prefill.py
+++ b/tests/v1/e2e/test_kv_sharing_fast_prefill.py
@@ -91,6 +91,7 @@ def test_kv_sharing_fast_prefill(
             compilation_config=compilation_config,
             seed=SEED,
             kv_sharing_fast_prefill=kv_sharing_fast_prefill,
+            attention_backend="TRITON_ATTN",
         )
         responses = llm.generate(prompts, sampling_params)
         check_answers(
diff --git a/tests/v1/e2e/test_spec_decode.py b/tests/v1/e2e/test_spec_decode.py
index 3988070ca..8fdca83a2 100644
--- a/tests/v1/e2e/test_spec_decode.py
+++ b/tests/v1/e2e/test_spec_decode.py
@@ -732,11 +732,13 @@ def test_mtp_correctness(
         method, model_name, tp_size = model_setup
         _skip_if_insufficient_gpus_for_tp(tp_size)
 
+        attn_backend = "TRITON_ATTN" if current_platform.is_rocm() else "auto"
         ref_llm = LLM(
             model=model_name,
             max_model_len=2048,
             tensor_parallel_size=tp_size,
             trust_remote_code=True,
+            attention_backend=attn_backend,
         )
         ref_outputs = ref_llm.chat(test_prompts, sampling_config)
         evaluate_llm_for_gsm8k(
@@ -756,6 +758,7 @@ def test_mtp_correctness(
                 "max_model_len": 2048,
             },
             max_model_len=2048,
+            attention_backend=attn_backend,
         )
         evaluate_llm_for_gsm8k(
             spec_llm, expected_accuracy_threshold=expected_accuracy_threshold
diff --git a/tests/v1/sample/test_logprobs.py b/tests/v1/sample/test_logprobs.py
index df2fac85e..d029a6ce0 100644
--- a/tests/v1/sample/test_logprobs.py
+++ b/tests/v1/sample/test_logprobs.py
@@ -42,9 +42,7 @@ SAMPLE_PROMPT = BatchLogprobsComposition.SAMPLE_PROMPT
 # Force LLM instances into an identical, deterministic execution
 # mode so the test isolates spec-decode correctness only:
 ROCM_DETERMINISM_KWARGS: dict = (
-    dict(
-        max_num_seqs=1,
-    )
+    dict(max_num_seqs=1, attention_backend="TRITON_ATTN")
     if current_platform.is_rocm()
     else {}
 )