[Misc] Remove redundant attention var constants (#29650)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2025-11-28 20:35:19 +08:00
parent 5c2b5cb422
commit 33b06a6f24
7 changed files with 19 additions and 63 deletions
--- a/tests/kernels/utils.py
+++ b/tests/kernels/utils.py
@@ -9,7 +9,6 @@ from numbers import Number
 from typing import Any, NamedTuple
 from unittest.mock import patch

-import pytest
 import torch
 from torch._prims_common import TensorLikeType

@@ -17,9 +16,6 @@ from tests.kernels.quant_utils import native_w8a8_block_matmul
 from vllm.attention.backends.abstract import AttentionType
 from vllm.model_executor.layers.activation import SiluAndMul
 from vllm.model_executor.layers.fused_moe.utils import moe_kernel_quantize_input
-from vllm.utils import (
-    STR_BACKEND_ENV_VAR,
-)
 from vllm.utils.torch_utils import make_tensor_with_pad

 # For now, disable "test_aot_dispatch_dynamic" since there are some
@@ -217,22 +213,6 @@ def make_causal_mask(
    return mask


-def override_backend_env_variable(
-    mpatch: pytest.MonkeyPatch, backend_name: str
-) -> None:
-    """
-    Override the environment variable indicating the vLLM backend temporarily,
-    using pytest monkeypatch to ensure that the env vars get
-    reset once the test context exits.
-
-    Arguments:
-
-    * mpatch: pytest monkeypatch instance
-    * backend_name: attention backend name to force
-    """
-    mpatch.setenv(STR_BACKEND_ENV_VAR, backend_name)
-
-
 def ref_masked_attention(
    query: torch.Tensor,
    key: torch.Tensor,