diff --git a/tools/pre_commit/mypy.py b/tools/pre_commit/mypy.py index 4b7f85077..48803930d 100755 --- a/tools/pre_commit/mypy.py +++ b/tools/pre_commit/mypy.py @@ -74,8 +74,6 @@ EXCLUDE = [ "vllm/model_executor/layers/fla/ops", # Ignore triton kernels in ops. "vllm/v1/attention/ops", - # TODO(matt): remove. - "vllm/v1/attention/backends/fa_utils.py", ] diff --git a/vllm/v1/attention/backends/fa_utils.py b/vllm/v1/attention/backends/fa_utils.py index 95148e881..eb902afd0 100644 --- a/vllm/v1/attention/backends/fa_utils.py +++ b/vllm/v1/attention/backends/fa_utils.py @@ -7,10 +7,7 @@ from vllm.platforms import current_platform logger = init_logger(__name__) if current_platform.is_cuda(): - from vllm import _custom_ops - - ops = _custom_ops - reshape_and_cache_flash = ops.reshape_and_cache_flash + from vllm._custom_ops import reshape_and_cache_flash from vllm.vllm_flash_attn import ( # type: ignore[attr-defined] flash_attn_varlen_func, get_scheduler_metadata, @@ -19,10 +16,9 @@ if current_platform.is_cuda(): elif current_platform.is_xpu(): from vllm._ipex_ops import ipex_ops - ops = ipex_ops - reshape_and_cache_flash = ops.reshape_and_cache_flash - flash_attn_varlen_func = ops.flash_attn_varlen_func - get_scheduler_metadata = ops.get_scheduler_metadata + reshape_and_cache_flash = ipex_ops.reshape_and_cache_flash + flash_attn_varlen_func = ipex_ops.flash_attn_varlen_func + get_scheduler_metadata = ipex_ops.get_scheduler_metadata elif current_platform.is_rocm(): try: diff --git a/vllm/v1/attention/ops/paged_attn.py b/vllm/v1/attention/ops/paged_attn.py index 280629548..73995fc93 100644 --- a/vllm/v1/attention/ops/paged_attn.py +++ b/vllm/v1/attention/ops/paged_attn.py @@ -7,13 +7,9 @@ import torch from vllm.platforms import current_platform if current_platform.is_cuda_alike(): - from vllm import _custom_ops - - ops = _custom_ops + from vllm import _custom_ops as ops elif current_platform.is_xpu(): - from vllm._ipex_ops import ipex_ops - - ops = ipex_ops + from vllm._ipex_ops import ipex_ops as ops # type: ignore[no-redef] class PagedAttention: