diff --git a/tests/v1/e2e/test_async_scheduling.py b/tests/v1/e2e/test_async_scheduling.py
index c703d6aae..a54b612f7 100644
--- a/tests/v1/e2e/test_async_scheduling.py
+++ b/tests/v1/e2e/test_async_scheduling.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import os
 from itertools import repeat
 from typing import Any
 
@@ -19,6 +20,8 @@ from ...models.utils import check_outputs_equal
 MODEL = "Qwen/Qwen3-0.6B"
 MTP_MODEL = "meta-llama/Llama-3.2-1B-Instruct"
 
+# Need to enforce eager for MRV2 while we sort out cudagraph issues.
+ENFORCE_EAGER = os.getenv("ENFORCE_EAGER", "0") == "1"
 
 first_prompt = (
     "The following numbers of the sequence "
@@ -47,10 +50,10 @@ def test_without_spec_decoding(
     test_sampling_params: list[dict[str, Any]] = [
         dict(),
         # dict(min_tokens=20),
-        dict(presence_penalty=-1.0),
+        dict(frequency_penalty=-1.0),
         dict(bad_words=["the", " the"]),
         dict(logprobs=2),
-        dict(logprobs=2, presence_penalty=-1.0),
+        dict(logprobs=2, frequency_penalty=-1.0),
         dict(structured_outputs=struct_outputs),
         dict(
             structured_outputs=struct_outputs,
@@ -58,12 +61,12 @@ def test_without_spec_decoding(
         ),
         dict(
             structured_outputs=struct_outputs,
-            presence_penalty=-1.0,
+            frequency_penalty=-1.0,
         ),
         dict(
             structured_outputs=struct_outputs,
             logprobs=2,
-            presence_penalty=-1.0,
+            frequency_penalty=-1.0,
         ),
     ]
 
@@ -116,15 +119,15 @@ def test_with_eagle3_spec_decoding(sample_json_schema, monkeypatch: pytest.Monke
 
     test_sampling_params = [
         dict(),
-        dict(presence_penalty=-1.0),
+        dict(frequency_penalty=-1.0),
         dict(bad_words=["the", " the"]),
         dict(logprobs=2),
-        dict(logprobs=2, presence_penalty=-1.0),
+        dict(logprobs=2, frequency_penalty=-1.0),
         dict(structured_outputs=struct_outputs),
         dict(
             structured_outputs=struct_outputs,
             logprobs=2,
-            presence_penalty=-1.0,
+            frequency_penalty=-1.0,
         ),
     ]
 
@@ -144,14 +147,7 @@ def test_with_eagle3_spec_decoding(sample_json_schema, monkeypatch: pytest.Monke
         (True, "uni", True, spec_config_short, True),
     ]
 
-    # On ROCm, use TRITON_ATTN + float32 for better numerical consistency
-    run_tests(
-        monkeypatch,
-        MTP_MODEL,
-        test_configs,
-        test_sampling_params,
-        is_testing_with_spec_decoding=True,
-    )
+    run_tests(monkeypatch, MTP_MODEL, test_configs, test_sampling_params)
 
 
 def test_with_ngram_gpu_spec_decoding(monkeypatch: pytest.MonkeyPatch):
@@ -196,12 +192,11 @@ def run_tests(
     model: str,
     test_configs: list[tuple],
     test_sampling_params: list[dict[str, Any]],
-    is_testing_with_spec_decoding: bool = False,
 ):
     """Test consistency of combos of async scheduling, preemption,
     uni/multiproc executor with spec decoding."""
 
-    # Determine attention config based on platform
+    # Flex attention supports float32.
     attention_config = {"backend": "FLEX_ATTENTION"}
 
     with monkeypatch.context() as m:
@@ -226,7 +221,6 @@ def run_tests(
                 async_scheduling,
                 spec_config,
                 test_prefill_chunking=test_prefill_chunking,
-                is_testing_with_spec_decoding=is_testing_with_spec_decoding,
                 attention_config=attention_config,
             )
             outputs.append(test_results)
@@ -250,6 +244,7 @@ def run_tests(
             test_acceptance_rates or repeat(None),
             test_sampling_params,
         ):
+            reason = None
             try:
                 check_outputs_equal(
                     outputs_0_lst=base_outs,
@@ -257,42 +252,57 @@ def run_tests(
                     name_0=f"baseline=[{baseline_config}], params={params}",
                     name_1=f"config=[{test_config}], params={params}",
                 )
+            except AssertionError as e:
+                reason = "outputs ", e
 
-                assert _all_logprobs_match(base_logprobs, test_logprobs)
+            if reason is None:
+                try:
+                    assert _all_logprobs_match(base_logprobs, test_logprobs)
+                except AssertionError as e:
+                    reason = "logprobs", e
 
-                if (
-                    base_acceptance_rate is not None
-                    and test_acceptance_rate is not None
-                ):
-                    if "spec_mml=None" in test_config:
-                        # Preemption causes more variance in acceptance rates
-                        if (
-                            current_platform.is_rocm()
-                            and "preemption=True" in test_config
-                        ):
-                            tolerance = 0.10
+            if reason is None:
+                try:
+                    if (
+                        base_acceptance_rate is not None
+                        and test_acceptance_rate is not None
+                    ):
+                        if "spec_mml=None" in test_config:
+                            # Preemption causes more variance in acceptance rates
+                            if (
+                                current_platform.is_rocm()
+                                and "preemption=True" in test_config
+                            ):
+                                tolerance = 0.10
+                            else:
+                                tolerance = 0.05
+                            assert (
+                                test_acceptance_rate > base_acceptance_rate
+                                or test_acceptance_rate
+                                == pytest.approx(base_acceptance_rate, rel=tolerance)
+                            )
                         else:
-                            tolerance = 0.05
-                        assert (
-                            test_acceptance_rate > base_acceptance_rate
-                            or test_acceptance_rate
-                            == pytest.approx(base_acceptance_rate, rel=tolerance)
-                        )
-                    else:
-                        # Currently the reported acceptance rate is expected to be
-                        # lower when we sometimes skip drafting altogether.
-                        assert test_acceptance_rate > 0.1
+                            # Currently the reported acceptance rate is expected to be
+                            # lower when we sometimes skip drafting altogether.
+                            assert test_acceptance_rate > 0.1
+                except AssertionError as e:
+                    reason = "accept  ", e
+
+            if reason is None:
                 print(
-                    f"PASSED: config=[{test_config}], params={params}"
+                    f"\033[32mPASSED\033[0m:           "
+                    f"config=[{test_config}], params={params}"
                     f" accept_rate={test_acceptance_rate}"
                 )
-            except AssertionError as e:
+            else:
+                reason_str, _ = reason
                 print(
-                    f"FAILED: config=[{test_config}], params={params}"
+                    f"\033[31mFAILED\033[0m({reason_str}): "
+                    f"config=[{test_config}], params={params}"
                     f" accept_rate={test_acceptance_rate}"
                 )
                 if failure is None:
-                    failure = e
+                    _, failure = reason
 
     if failure is not None:
         raise failure
@@ -307,7 +317,6 @@ def run_test(
     async_scheduling: bool,
     spec_config: dict[str, Any] | None,
     test_prefill_chunking: bool,
-    is_testing_with_spec_decoding: bool = False,
     attention_config: dict[str, Any] | None = None,
 ):
     spec_decoding = spec_config is not None
@@ -335,7 +344,7 @@ def run_test(
         enable_chunked_prefill=test_prefill_chunking,
         # Force prefill chunking
         max_num_batched_tokens=48 if test_prefill_chunking else None,
-        # enforce_eager=True,
+        enforce_eager=ENFORCE_EAGER,
         async_scheduling=async_scheduling,
         distributed_executor_backend=executor,
         dtype="float32",