[Attention] Update tests to remove deprecated env vars (#30563)

Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
2025-12-17 12:49:59 -05:00
parent 9ca8cb38fd
commit 7eb6cb6c18
34 changed files with 580 additions and 447 deletions
--- a/tests/distributed/test_context_parallel.py
+++ b/tests/distributed/test_context_parallel.py
@@ -219,14 +219,12 @@ def _test_cp_gsm8k(
        ]
    )

-    server_env = {}
    if attn_backend:
-        server_env["VLLM_ATTENTION_BACKEND"] = attn_backend
+        server_args.append(f"--attention-backend={attn_backend}")

    with RemoteOpenAIServer(
        model_id,
        server_args,
-        env_dict=server_env,
        max_wait_seconds=720,
    ) as remote_server:
        host = f"http://{remote_server.host}"
--- a/tests/distributed/test_pp_cudagraph.py
+++ b/tests/distributed/test_pp_cudagraph.py
@@ -20,23 +20,21 @@ from ..utils import compare_two_settings, create_new_process_for_each_test
 )
@create_new_process_for_each_test()
 def test_pp_cudagraph(
-    monkeypatch: pytest.MonkeyPatch,
    PP_SIZE: int,
    MODEL_NAME: str,
    ATTN_BACKEND: LiteralString,
 ):
-    with monkeypatch.context() as m:
-        cudagraph_args = [
-            # use half precision for speed and memory savings in CI environment
-            "--dtype",
-            "float16",
-            "--pipeline-parallel-size",
-            str(PP_SIZE),
-            "--distributed-executor-backend",
-            "mp",
-        ]
-        m.setenv("VLLM_ATTENTION_BACKEND", ATTN_BACKEND)
+    cudagraph_args = [
+        # use half precision for speed and memory savings in CI environment
+        "--dtype",
+        "float16",
+        "--pipeline-parallel-size",
+        str(PP_SIZE),
+        "--distributed-executor-backend",
+        "mp",
+        f"--attention-backend={ATTN_BACKEND}",
+    ]

-        eager_args = cudagraph_args + ["--enforce-eager"]
+    eager_args = cudagraph_args + ["--enforce-eager"]

-        compare_two_settings(MODEL_NAME, eager_args, cudagraph_args)
+    compare_two_settings(MODEL_NAME, eager_args, cudagraph_args)