From 03089019759b4e9af69c160ad0c516e3dcdb1420 Mon Sep 17 00:00:00 2001 From: Matthew Bonanni Date: Fri, 9 Jan 2026 19:27:15 -0500 Subject: [PATCH] [2/N][Attention] Fix pre-commit errors (#32052) Signed-off-by: Matthew Bonanni --- tools/pre_commit/mypy.py | 2 -- vllm/v1/attention/backends/fa_utils.py | 12 ++++-------- vllm/v1/attention/ops/paged_attn.py | 8 ++------ 3 files changed, 6 insertions(+), 16 deletions(-) diff --git a/tools/pre_commit/mypy.py b/tools/pre_commit/mypy.py index 4b7f85077..48803930d 100755 --- a/tools/pre_commit/mypy.py +++ b/tools/pre_commit/mypy.py @@ -74,8 +74,6 @@ EXCLUDE = [ "vllm/model_executor/layers/fla/ops", # Ignore triton kernels in ops. "vllm/v1/attention/ops", - # TODO(matt): remove. - "vllm/v1/attention/backends/fa_utils.py", ] diff --git a/vllm/v1/attention/backends/fa_utils.py b/vllm/v1/attention/backends/fa_utils.py index 95148e881..eb902afd0 100644 --- a/vllm/v1/attention/backends/fa_utils.py +++ b/vllm/v1/attention/backends/fa_utils.py @@ -7,10 +7,7 @@ from vllm.platforms import current_platform logger = init_logger(__name__) if current_platform.is_cuda(): - from vllm import _custom_ops - - ops = _custom_ops - reshape_and_cache_flash = ops.reshape_and_cache_flash + from vllm._custom_ops import reshape_and_cache_flash from vllm.vllm_flash_attn import ( # type: ignore[attr-defined] flash_attn_varlen_func, get_scheduler_metadata, @@ -19,10 +16,9 @@ if current_platform.is_cuda(): elif current_platform.is_xpu(): from vllm._ipex_ops import ipex_ops - ops = ipex_ops - reshape_and_cache_flash = ops.reshape_and_cache_flash - flash_attn_varlen_func = ops.flash_attn_varlen_func - get_scheduler_metadata = ops.get_scheduler_metadata + reshape_and_cache_flash = ipex_ops.reshape_and_cache_flash + flash_attn_varlen_func = ipex_ops.flash_attn_varlen_func + get_scheduler_metadata = ipex_ops.get_scheduler_metadata elif current_platform.is_rocm(): try: diff --git a/vllm/v1/attention/ops/paged_attn.py b/vllm/v1/attention/ops/paged_attn.py index 280629548..73995fc93 100644 --- a/vllm/v1/attention/ops/paged_attn.py +++ b/vllm/v1/attention/ops/paged_attn.py @@ -7,13 +7,9 @@ import torch from vllm.platforms import current_platform if current_platform.is_cuda_alike(): - from vllm import _custom_ops - - ops = _custom_ops + from vllm import _custom_ops as ops elif current_platform.is_xpu(): - from vllm._ipex_ops import ipex_ops - - ops = ipex_ops + from vllm._ipex_ops import ipex_ops as ops # type: ignore[no-redef] class PagedAttention: