[Attention] Update tests to remove deprecated env vars (#30563)

Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
2025-12-17 12:49:59 -05:00
parent 9ca8cb38fd
commit 7eb6cb6c18
34 changed files with 580 additions and 447 deletions
--- a/tests/models/test_initialization.py
+++ b/tests/models/test_initialization.py
@@ -108,11 +108,12 @@ def can_initialize(
        patch.object(V1EngineCore, "_initialize_kv_caches", _initialize_kv_caches_v1),
        monkeypatch.context() as m,
    ):
-        if model_arch == "GptOssForCausalLM":
-            # FIXME: A hack to bypass FA3 assertion because our CI's L4 GPU
-            # has cc==8.9 which hasn't supported FA3 yet. Remove this hack when
-            # L4 supports FA3.
-            m.setenv("VLLM_ATTENTION_BACKEND", "TRITON_ATTN")
+        # FIXME: A hack to bypass FA3 assertion because our CI's L4 GPU
+        # has cc==8.9 which hasn't supported FA3 yet. Remove this hack when
+        # L4 supports FA3.
+        attention_config = (
+            {"backend": "TRITON_ATTN"} if model_arch == "GptOssForCausalLM" else None
+        )
        if model_arch == "WhisperForConditionalGeneration":
            m.setenv("VLLM_WORKER_MULTIPROC_METHOD", "spawn")

@@ -143,6 +144,7 @@ def can_initialize(
            else "vllm",
            hf_overrides=hf_overrides_fn,
            max_num_seqs=model_info.max_num_seqs,
+            attention_config=attention_config,
        )