diff --git a/tests/kernels/attention/test_attention_selector.py b/tests/kernels/attention/test_attention_selector.py
index cd34b520e..c959b2f4b 100644
--- a/tests/kernels/attention/test_attention_selector.py
+++ b/tests/kernels/attention/test_attention_selector.py
@@ -11,7 +11,6 @@ from vllm.platforms import current_platform
 from vllm.platforms.cpu import CpuPlatform
 from vllm.platforms.cuda import CudaPlatform
 from vllm.platforms.rocm import RocmPlatform
-from vllm.utils import STR_BACKEND_ENV_VAR, STR_FLASH_ATTN_VAL, STR_INVALID_VAL
 
 
 @pytest.fixture(autouse=True)
@@ -83,7 +82,7 @@ def test_env(
 ):
     """Test attention backend selection with valid device-backend pairs."""
     with monkeypatch.context() as m:
-        m.setenv(STR_BACKEND_ENV_VAR, name)
+        m.setenv("VLLM_ATTENTION_BACKEND", name)
         m.setenv("VLLM_MLA_DISABLE", "1" if use_mla else "0")
 
         if device == "cpu":
@@ -237,27 +236,27 @@ def test_flash_attn(monkeypatch: pytest.MonkeyPatch):
     )
 
     with monkeypatch.context() as m:
-        m.setenv(STR_BACKEND_ENV_VAR, STR_FLASH_ATTN_VAL)
+        m.setenv("VLLM_ATTENTION_BACKEND", "FLASH_ATTN")
 
         # Unsupported CUDA arch
         monkeypatch.setattr(torch.cuda, "get_device_capability", lambda _=None: (7, 5))
         backend = get_attn_backend(16, torch.float16, None, 16)
-        assert backend.get_name() != STR_FLASH_ATTN_VAL
+        assert backend.get_name() != "FLASH_ATTN"
 
         # Reset the monkeypatch for subsequent tests
         monkeypatch.undo()
 
         # Unsupported data type
         backend = get_attn_backend(16, torch.float8_e4m3fn, None, 16)
-        assert backend.get_name() != STR_FLASH_ATTN_VAL
+        assert backend.get_name() != "FLASH_ATTN"
 
         # Unsupported kv cache data type
         backend = get_attn_backend(16, torch.float16, "fp8", 16)
-        assert backend.get_name() != STR_FLASH_ATTN_VAL
+        assert backend.get_name() != "FLASH_ATTN"
 
         # Unsupported block size
         backend = get_attn_backend(16, torch.float16, None, 8)
-        assert backend.get_name() != STR_FLASH_ATTN_VAL
+        assert backend.get_name() != "FLASH_ATTN"
 
         # flash-attn is not installed
         import sys
@@ -265,7 +264,7 @@ def test_flash_attn(monkeypatch: pytest.MonkeyPatch):
         original_module = sys.modules.get("vllm_flash_attn")
         monkeypatch.setitem(sys.modules, "vllm_flash_attn", None)
         backend = get_attn_backend(16, torch.float16, None, 16)
-        assert backend.get_name() != STR_FLASH_ATTN_VAL
+        assert backend.get_name() != "FLASH_ATTN"
 
         # Restore the original module if it existed
         if original_module is not None:
@@ -275,7 +274,7 @@ def test_flash_attn(monkeypatch: pytest.MonkeyPatch):
 
         # Unsupported head size
         backend = get_attn_backend(17, torch.float16, None, 16)
-        assert backend.get_name() != STR_FLASH_ATTN_VAL
+        assert backend.get_name() != "FLASH_ATTN"
 
 
 def test_invalid_env(monkeypatch: pytest.MonkeyPatch):
@@ -284,7 +283,7 @@ def test_invalid_env(monkeypatch: pytest.MonkeyPatch):
         monkeypatch.context() as m,
         patch("vllm.platforms.current_platform", CudaPlatform()),
     ):
-        m.setenv(STR_BACKEND_ENV_VAR, STR_INVALID_VAL)
+        m.setenv("VLLM_ATTENTION_BACKEND", "INVALID")
 
         # Should raise ValueError for invalid backend
         with pytest.raises(ValueError) as exc_info:
diff --git a/tests/kernels/attention/test_rocm_attention_selector.py b/tests/kernels/attention/test_rocm_attention_selector.py
index 9b7fb6649..b61058081 100644
--- a/tests/kernels/attention/test_rocm_attention_selector.py
+++ b/tests/kernels/attention/test_rocm_attention_selector.py
@@ -6,7 +6,6 @@ import torch
 
 from vllm.attention.selector import _cached_get_attn_backend, get_attn_backend
 from vllm.platforms.rocm import RocmPlatform
-from vllm.utils import STR_BACKEND_ENV_VAR
 
 
 @pytest.fixture(autouse=True)
@@ -18,7 +17,7 @@ def clear_cache():
 @pytest.mark.skip(reason="Skipped for now. Should be revisited.")
 def test_selector(monkeypatch: pytest.MonkeyPatch):
     with monkeypatch.context() as m:
-        m.setenv(STR_BACKEND_ENV_VAR, "ROCM_ATTN")
+        m.setenv("VLLM_ATTENTION_BACKEND", "ROCM_ATTN")
 
         # Set the current platform to ROCm using monkeypatch
         monkeypatch.setattr("vllm.attention.selector.current_platform", RocmPlatform())
@@ -30,19 +29,19 @@ def test_selector(monkeypatch: pytest.MonkeyPatch):
         # MLA test for deepseek related
 
         # change the attention backend to triton MLA
-        m.setenv(STR_BACKEND_ENV_VAR, "TRITON_MLA")
+        m.setenv("VLLM_ATTENTION_BACKEND", "TRITON_MLA")
         backend = get_attn_backend(576, torch.bfloat16, "auto", 16, False, use_mla=True)
         assert backend.get_name() == "TRITON_MLA"
 
         # If attention backend is None
         # If use_mla is true
         # The selected backend is triton MLA
-        m.setenv(STR_BACKEND_ENV_VAR, None)
+        m.setenv("VLLM_ATTENTION_BACKEND", "")
         backend = get_attn_backend(576, torch.bfloat16, "auto", 16, False, use_mla=True)
         assert backend.get_name() == "TRITON_MLA"
 
         # change the attention backend to AITER MLA
-        m.setenv(STR_BACKEND_ENV_VAR, "ROCM_AITER_MLA")
+        m.setenv("VLLM_ATTENTION_BACKEND", "ROCM_AITER_MLA")
         backend = get_attn_backend(576, torch.bfloat16, "auto", 1, False, use_mla=True)
         assert backend.get_name() == "ROCM_AITER_MLA"
 
@@ -50,7 +49,7 @@ def test_selector(monkeypatch: pytest.MonkeyPatch):
         # If use_mla is true
         # If VLLM_ROCM_USE_AITER is enabled
         # The selected backend is ROCM_AITER_MLA
-        m.setenv(STR_BACKEND_ENV_VAR, None)
+        m.setenv("VLLM_ATTENTION_BACKEND", "")
         m.setenv("VLLM_ROCM_USE_AITER", "1")
         backend = get_attn_backend(576, torch.bfloat16, "auto", 1, False, use_mla=True)
         assert backend.get_name() == "ROCM_AITER_MLA"
diff --git a/tests/kernels/utils.py b/tests/kernels/utils.py
index b8148ce06..75e82f931 100644
--- a/tests/kernels/utils.py
+++ b/tests/kernels/utils.py
@@ -9,7 +9,6 @@ from numbers import Number
 from typing import Any, NamedTuple
 from unittest.mock import patch
 
-import pytest
 import torch
 from torch._prims_common import TensorLikeType
 
@@ -17,9 +16,6 @@ from tests.kernels.quant_utils import native_w8a8_block_matmul
 from vllm.attention.backends.abstract import AttentionType
 from vllm.model_executor.layers.activation import SiluAndMul
 from vllm.model_executor.layers.fused_moe.utils import moe_kernel_quantize_input
-from vllm.utils import (
-    STR_BACKEND_ENV_VAR,
-)
 from vllm.utils.torch_utils import make_tensor_with_pad
 
 # For now, disable "test_aot_dispatch_dynamic" since there are some
@@ -217,22 +213,6 @@ def make_causal_mask(
     return mask
 
 
-def override_backend_env_variable(
-    mpatch: pytest.MonkeyPatch, backend_name: str
-) -> None:
-    """
-    Override the environment variable indicating the vLLM backend temporarily,
-    using pytest monkeypatch to ensure that the env vars get
-    reset once the test context exits.
-
-    Arguments:
-
-    * mpatch: pytest monkeypatch instance
-    * backend_name: attention backend name to force
-    """
-    mpatch.setenv(STR_BACKEND_ENV_VAR, backend_name)
-
-
 def ref_masked_attention(
     query: torch.Tensor,
     key: torch.Tensor,
diff --git a/tests/models/quantization/test_fp8.py b/tests/models/quantization/test_fp8.py
index 2a6f34a9c..7dfedaf27 100644
--- a/tests/models/quantization/test_fp8.py
+++ b/tests/models/quantization/test_fp8.py
@@ -11,7 +11,6 @@ import pytest
 from tests.quantization.utils import is_quant_method_supported
 from vllm.attention.utils.fa_utils import flash_attn_supports_fp8
 from vllm.platforms import current_platform
-from vllm.utils import STR_BACKEND_ENV_VAR
 from ..utils import check_logprobs_close
 
 
@@ -76,7 +75,7 @@ def test_models(
 
     with monkeypatch.context() as m:
         m.setenv("TOKENIZERS_PARALLELISM", "true")
-        m.setenv(STR_BACKEND_ENV_VAR, backend)
+        m.setenv("VLLM_ATTENTION_BACKEND", backend)
 
         MAX_MODEL_LEN = 1024
         NUM_LOG_PROBS = 8
diff --git a/vllm/attention/selector.py b/vllm/attention/selector.py
index ad19b58aa..a7190df3c 100644
--- a/vllm/attention/selector.py
+++ b/vllm/attention/selector.py
@@ -19,7 +19,6 @@ from vllm.attention.backends.registry import (
 )
 from vllm.config.cache import CacheDType
 from vllm.logger import init_logger
-from vllm.utils import STR_BACKEND_ENV_VAR
 from vllm.utils.import_utils import resolve_obj_by_qualname
 
 logger = init_logger(__name__)
@@ -35,7 +34,7 @@ def get_env_variable_attn_backend() -> AttentionBackendEnum | None:
     * AttentionBackendEnum value if an override is specified
     * None otherwise
     """
-    backend_name = os.environ.get(STR_BACKEND_ENV_VAR)
+    backend_name = os.environ.get("VLLM_ATTENTION_BACKEND")
     if backend_name is None:
         return None
     if backend_name == "XFORMERS":
@@ -139,10 +138,10 @@ def _cached_get_attn_backend(
             if backend_by_env_var.endswith("_VLLM_V1"):
                 logger.warning(
                     "The suffix '_VLLM_V1' in the environment variable "
-                    "%s is no longer necessary as V0 backends have been "
-                    "deprecated. Please remove this suffix from your "
+                    "VLLM_ATTENTION_BACKEND is no longer necessary as "
+                    "V0 backends have been deprecated. "
+                    "Please remove this suffix from your "
                     "environment variable setting.",
-                    STR_BACKEND_ENV_VAR,
                 )
                 backend_by_env_var = backend_by_env_var.removesuffix("_VLLM_V1")
             try:
diff --git a/vllm/model_executor/models/deepseek_eagle.py b/vllm/model_executor/models/deepseek_eagle.py
index 4d7a37292..8f6b4a4b0 100644
--- a/vllm/model_executor/models/deepseek_eagle.py
+++ b/vllm/model_executor/models/deepseek_eagle.py
@@ -23,12 +23,9 @@ from vllm.model_executor.models.deepseek_v2 import (
     DeepseekV2DecoderLayer,
     DeepseekV3ForCausalLM,
 )
-from vllm.utils import init_logger
 
 from .utils import AutoWeightsLoader, maybe_prefix, process_eagle_weight
 
-logger = init_logger(__name__)
-
 
 @support_torch_compile
 class DeepseekV2Model(nn.Module):
diff --git a/vllm/utils/__init__.py b/vllm/utils/__init__.py
index fddcc2720..25e7978c7 100644
--- a/vllm/utils/__init__.py
+++ b/vllm/utils/__init__.py
@@ -7,8 +7,6 @@ from typing import Any
 
 import torch
 
-from vllm.logger import init_logger
-
 _DEPRECATED_MAPPINGS = {
     "cprofile": "profiling",
     "cprofile_context": "profiling",
@@ -37,21 +35,6 @@ def __dir__() -> list[str]:
     return sorted(list(globals().keys()) + list(_DEPRECATED_MAPPINGS.keys()))
 
 
-logger = init_logger(__name__)
-
-# Constants related to forcing the attention backend selection
-
-# String name of register which may be set in order to
-# force auto-selection of attention backend by Attention
-# wrapper
-STR_BACKEND_ENV_VAR: str = "VLLM_ATTENTION_BACKEND"
-
-# Possible string values of STR_BACKEND_ENV_VAR
-# register, corresponding to possible backends
-STR_FLASHINFER_ATTN_VAL: str = "FLASHINFER"
-STR_FLASH_ATTN_VAL: str = "FLASH_ATTN"
-STR_INVALID_VAL: str = "INVALID"
-
 MASK_64_BITS = (1 << 64) - 1