diff --git a/vllm/compilation/passes/fusion/sequence_parallelism.py b/vllm/compilation/passes/fusion/sequence_parallelism.py
index 63de85932..b7ae3dc62 100644
--- a/vllm/compilation/passes/fusion/sequence_parallelism.py
+++ b/vllm/compilation/passes/fusion/sequence_parallelism.py
@@ -18,7 +18,6 @@ from vllm.logger import init_logger
 from vllm.model_executor.layers.quantization.utils.quant_utils import (
     kFp8StaticTensorSym,
 )
-from vllm.platforms import current_platform
 
 from ..inductor_pass import enable_fake_mode
 from ..utility.noop_elimination import NoOpEliminationPass
@@ -215,9 +214,6 @@ class MiddleAllReduceRMSNormPattern(_SequenceParallelPatternHelper):
         )
 
 
-FP8_DTYPE = current_platform.fp8_dtype()
-
-
 class FirstAllReduceRMSNormStaticFP8Pattern(_SequenceParallelPatternHelper):
     def __init__(
         self,
diff --git a/vllm/config/model.py b/vllm/config/model.py
index 012b2b1c9..4e3568fa1 100644
--- a/vllm/config/model.py
+++ b/vllm/config/model.py
@@ -461,8 +461,6 @@ class ModelConfig:
 
         self.maybe_pull_model_tokenizer_for_runai(self.model, self.tokenizer)
 
-        from vllm.platforms import current_platform
-
         if self.override_attention_dtype is not None and not current_platform.is_rocm():
             warnings.warn(
                 "override-attention-dtype is set but not using ROCm platform",
@@ -940,8 +938,6 @@ class ModelConfig:
                     f"Unknown quantization method: {self.quantization}. Must "
                     f"be one of {supported_quantization}."
                 )
-            from vllm.platforms import current_platform
-
             current_platform.verify_quantization(self.quantization)
 
         if self.quantization in me_quant.DEPRECATED_QUANTIZATION_METHODS:
@@ -1811,8 +1807,6 @@ def _resolve_auto_dtype(
     *,
     is_pooling_model: bool,
 ):
-    from vllm.platforms import current_platform
-
     supported_dtypes = [
         dtype
         for dtype in current_platform.supported_dtypes
diff --git a/vllm/distributed/parallel_state.py b/vllm/distributed/parallel_state.py
index 9e6b6df08..40b797a1a 100644
--- a/vllm/distributed/parallel_state.py
+++ b/vllm/distributed/parallel_state.py
@@ -385,8 +385,6 @@ class GroupCoordinator:
                 self.cpu_group, 1 << 22, 6
             )
 
-        from vllm.platforms import current_platform
-
         self.use_custom_op_call = (
             current_platform.is_cuda_alike() or current_platform.is_tpu()
         )
diff --git a/vllm/v1/attention/backends/fa_utils.py b/vllm/v1/attention/backends/fa_utils.py
index 9658a7e3c..4039316c3 100644
--- a/vllm/v1/attention/backends/fa_utils.py
+++ b/vllm/v1/attention/backends/fa_utils.py
@@ -55,9 +55,6 @@ elif current_platform.is_rocm():
 def get_flash_attn_version(
     requires_alibi: bool = False, head_size: int | None = None
 ) -> int | None:
-    # import here to avoid circular dependencies
-    from vllm.platforms import current_platform
-
     if current_platform.is_xpu():
         return 2
     if current_platform.is_rocm():
diff --git a/vllm/v1/attention/backends/flashinfer.py b/vllm/v1/attention/backends/flashinfer.py
index 233251d07..4362bacb7 100755
--- a/vllm/v1/attention/backends/flashinfer.py
+++ b/vllm/v1/attention/backends/flashinfer.py
@@ -374,8 +374,6 @@ class FlashInferBackend(AttentionBackend):
 
     @classmethod
     def get_required_kv_cache_layout(cls) -> KVCacheLayoutType | None:
-        from vllm.platforms import current_platform
-
         capability = current_platform.get_device_capability()
         if capability is not None and capability.major == 10:
             return "HND"