[Misc] Cleanup useless current_platform import (#35715)
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
@@ -18,7 +18,6 @@ from vllm.logger import init_logger
|
||||
from vllm.model_executor.layers.quantization.utils.quant_utils import (
|
||||
kFp8StaticTensorSym,
|
||||
)
|
||||
from vllm.platforms import current_platform
|
||||
|
||||
from ..inductor_pass import enable_fake_mode
|
||||
from ..utility.noop_elimination import NoOpEliminationPass
|
||||
@@ -215,9 +214,6 @@ class MiddleAllReduceRMSNormPattern(_SequenceParallelPatternHelper):
|
||||
)
|
||||
|
||||
|
||||
FP8_DTYPE = current_platform.fp8_dtype()
|
||||
|
||||
|
||||
class FirstAllReduceRMSNormStaticFP8Pattern(_SequenceParallelPatternHelper):
|
||||
def __init__(
|
||||
self,
|
||||
|
||||
@@ -461,8 +461,6 @@ class ModelConfig:
|
||||
|
||||
self.maybe_pull_model_tokenizer_for_runai(self.model, self.tokenizer)
|
||||
|
||||
from vllm.platforms import current_platform
|
||||
|
||||
if self.override_attention_dtype is not None and not current_platform.is_rocm():
|
||||
warnings.warn(
|
||||
"override-attention-dtype is set but not using ROCm platform",
|
||||
@@ -940,8 +938,6 @@ class ModelConfig:
|
||||
f"Unknown quantization method: {self.quantization}. Must "
|
||||
f"be one of {supported_quantization}."
|
||||
)
|
||||
from vllm.platforms import current_platform
|
||||
|
||||
current_platform.verify_quantization(self.quantization)
|
||||
|
||||
if self.quantization in me_quant.DEPRECATED_QUANTIZATION_METHODS:
|
||||
@@ -1811,8 +1807,6 @@ def _resolve_auto_dtype(
|
||||
*,
|
||||
is_pooling_model: bool,
|
||||
):
|
||||
from vllm.platforms import current_platform
|
||||
|
||||
supported_dtypes = [
|
||||
dtype
|
||||
for dtype in current_platform.supported_dtypes
|
||||
|
||||
@@ -385,8 +385,6 @@ class GroupCoordinator:
|
||||
self.cpu_group, 1 << 22, 6
|
||||
)
|
||||
|
||||
from vllm.platforms import current_platform
|
||||
|
||||
self.use_custom_op_call = (
|
||||
current_platform.is_cuda_alike() or current_platform.is_tpu()
|
||||
)
|
||||
|
||||
@@ -55,9 +55,6 @@ elif current_platform.is_rocm():
|
||||
def get_flash_attn_version(
|
||||
requires_alibi: bool = False, head_size: int | None = None
|
||||
) -> int | None:
|
||||
# import here to avoid circular dependencies
|
||||
from vllm.platforms import current_platform
|
||||
|
||||
if current_platform.is_xpu():
|
||||
return 2
|
||||
if current_platform.is_rocm():
|
||||
|
||||
@@ -374,8 +374,6 @@ class FlashInferBackend(AttentionBackend):
|
||||
|
||||
@classmethod
|
||||
def get_required_kv_cache_layout(cls) -> KVCacheLayoutType | None:
|
||||
from vllm.platforms import current_platform
|
||||
|
||||
capability = current_platform.get_device_capability()
|
||||
if capability is not None and capability.major == 10:
|
||||
return "HND"
|
||||
|
||||
Reference in New Issue
Block a user