From f83b933b84b85ee54121575fc347881b35090616 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Tue, 10 Mar 2026 16:18:28 +0000 Subject: [PATCH] [CI] Bump `mypy` version to 1.19.1 (#36104) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- tests/quantization/test_mixed_precision.py | 1 + .../device_communicators/shm_broadcast.py | 3 ++ .../shm_object_storage.py | 2 ++ .../kv_transfer/kv_connector/utils.py | 1 + vllm/distributed/parallel_state.py | 2 ++ vllm/lora/layers/base.py | 14 +++++++- vllm/renderers/hf.py | 24 ++++++++++++- vllm/sampling_params.py | 1 + .../configs/funaudiochat.py | 34 ++++++++----------- vllm/transformers_utils/configs/kimi_k25.py | 14 ++++---- vllm/transformers_utils/processors/ovis2_5.py | 1 + vllm/v1/engine/detokenizer.py | 8 ++--- vllm/v1/executor/ray_executor.py | 4 +-- 14 files changed, 76 insertions(+), 35 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5585b55fd..a40068708 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -55,7 +55,7 @@ repos: language: python types_or: [python, pyi] require_serial: true - additional_dependencies: ["mypy[faster-cache]==1.15.0", regex, types-cachetools, types-setuptools, types-PyYAML, types-requests, types-torch, pydantic] + additional_dependencies: ["mypy[faster-cache]==1.19.1", regex, types-cachetools, types-setuptools, types-PyYAML, types-requests, types-torch, pydantic] - id: mypy-3.10 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward name: Run mypy for Python 3.10 entry: python tools/pre_commit/mypy.py 1 "3.10" diff --git a/tests/quantization/test_mixed_precision.py b/tests/quantization/test_mixed_precision.py index 51526470b..5087f9049 100755 --- a/tests/quantization/test_mixed_precision.py +++ b/tests/quantization/test_mixed_precision.py @@ -8,6 +8,7 @@ Run `pytest tests/quantization/test_mixed_precision.py`. import importlib import importlib.metadata +import importlib.util from dataclasses import dataclass import lm_eval diff --git a/vllm/distributed/device_communicators/shm_broadcast.py b/vllm/distributed/device_communicators/shm_broadcast.py index 1c5c4e01d..9c8bf3ad1 100644 --- a/vllm/distributed/device_communicators/shm_broadcast.py +++ b/vllm/distributed/device_communicators/shm_broadcast.py @@ -274,6 +274,7 @@ class ShmRingBuffer: self.shared_memory = shared_memory.SharedMemory( create=True, size=self.total_bytes_of_buffer ) + assert self.shared_memory.buf is not None, "Buffer was not created" # initialize the metadata section to 0 with self.shared_memory.buf[self.metadata_offset :] as metadata_buffer: torch.frombuffer(metadata_buffer, dtype=torch.uint8).fill_(0) @@ -325,6 +326,7 @@ class ShmRingBuffer: def get_data(self, current_idx: int): start = self.data_offset + current_idx * self.max_chunk_bytes end = start + self.max_chunk_bytes + assert self.shared_memory.buf is not None, "Buffer has been closed" with self.shared_memory.buf[start:end] as buf: yield buf @@ -332,6 +334,7 @@ class ShmRingBuffer: def get_metadata(self, current_idx: int): start = self.metadata_offset + current_idx * self.metadata_size end = start + self.metadata_size + assert self.shared_memory.buf is not None, "Buffer has been closed" with self.shared_memory.buf[start:end] as buf: yield buf diff --git a/vllm/distributed/device_communicators/shm_object_storage.py b/vllm/distributed/device_communicators/shm_object_storage.py index 3d6048052..e2d2b2483 100644 --- a/vllm/distributed/device_communicators/shm_object_storage.py +++ b/vllm/distributed/device_communicators/shm_object_storage.py @@ -197,6 +197,7 @@ class SingleWriterShmRingBuffer: """ assert self.is_writer, "Only the writer can allocate buffers." assert size > 0, "Size must be greater than 0" + assert self.shared_memory.buf is not None, "Buffer has been closed" size += self.MD_SIZE # add metadata size to the buffer size # reset to beginning if the buffer does have enough contiguous space buffer_end_reset = self.data_buffer_end % self.data_buffer_size @@ -239,6 +240,7 @@ class SingleWriterShmRingBuffer: @contextmanager def access_buf(self, address: int): + assert self.shared_memory.buf is not None, "Buffer has been closed" buf_idx = address % self.data_buffer_size # read metadata diff --git a/vllm/distributed/kv_transfer/kv_connector/utils.py b/vllm/distributed/kv_transfer/kv_connector/utils.py index 6e0366c52..319e5d76c 100644 --- a/vllm/distributed/kv_transfer/kv_connector/utils.py +++ b/vllm/distributed/kv_transfer/kv_connector/utils.py @@ -351,6 +351,7 @@ class TpKVTopology: include_num_layers_dimension=self._cross_layers_blocks ) except (AttributeError, NotImplementedError): + assert self.tensor_shape is not None kv_cache_stride_order = tuple(range(len(self.tensor_shape))) # In case of cross layers permute kv_cache_shape according to diff --git a/vllm/distributed/parallel_state.py b/vllm/distributed/parallel_state.py index fe48a6006..af1bc6b14 100644 --- a/vllm/distributed/parallel_state.py +++ b/vllm/distributed/parallel_state.py @@ -1964,6 +1964,7 @@ def in_the_same_node_as( if rank == source_rank: # create a shared memory segment shm = shared_memory.SharedMemory(create=True, size=128) + assert shm.buf is not None, "Buffer was not created" shm.buf[: len(magic_message)] = magic_message if isinstance(pg, ProcessGroup): torch.distributed.broadcast_object_list( @@ -1990,6 +1991,7 @@ def in_the_same_node_as( lambda *args, **kwargs: None, ): shm = shared_memory.SharedMemory(name=name) + assert shm.buf is not None, "Buffer was not opened" if shm.buf[: len(magic_message)] == magic_message: is_in_the_same_node[rank] = 1 except Exception as e: diff --git a/vllm/lora/layers/base.py b/vllm/lora/layers/base.py index a4b8fb4d2..26d2fb46d 100644 --- a/vllm/lora/layers/base.py +++ b/vllm/lora/layers/base.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, overload import torch import torch.nn as nn @@ -14,12 +14,24 @@ if TYPE_CHECKING: class BaseLayerWithLoRA(nn.Module): + @overload + def slice_lora_a( + self, lora_a: list[torch.Tensor | None] + ) -> list[torch.Tensor | None]: ... + @overload + def slice_lora_a(self, lora_a: torch.Tensor) -> torch.Tensor: ... def slice_lora_a( self, lora_a: torch.Tensor | list[torch.Tensor | None] ) -> torch.Tensor | list[torch.Tensor | None]: """Slice lora a if splitting for tensor parallelism.""" ... + @overload + def slice_lora_b( + self, lora_b: list[torch.Tensor | None] + ) -> list[torch.Tensor | None]: ... + @overload + def slice_lora_b(self, lora_b: torch.Tensor) -> torch.Tensor: ... def slice_lora_b( self, lora_b: torch.Tensor | list[torch.Tensor | None] ) -> torch.Tensor | list[torch.Tensor | None]: diff --git a/vllm/renderers/hf.py b/vllm/renderers/hf.py index c862f70aa..97d15ec62 100644 --- a/vllm/renderers/hf.py +++ b/vllm/renderers/hf.py @@ -5,7 +5,7 @@ import itertools from collections import defaultdict, deque from collections.abc import Set from functools import lru_cache -from typing import TYPE_CHECKING, Any, cast +from typing import TYPE_CHECKING, Any, Literal, cast, overload import jinja2 import jinja2.ext @@ -439,6 +439,28 @@ def resolve_chat_template_kwargs( return {k: v for k, v in chat_template_kwargs.items() if k in accept_vars} +@overload +def safe_apply_chat_template( + model_config: "ModelConfig", + tokenizer: HfTokenizer, + conversation: list[ConversationMessage], + *, + tools: list[dict[str, Any]] | None = ..., + chat_template: str | None = ..., + tokenize: Literal[True] = ..., + **kwargs, +) -> list[int]: ... +@overload +def safe_apply_chat_template( + model_config: "ModelConfig", + tokenizer: HfTokenizer, + conversation: list[ConversationMessage], + *, + tools: list[dict[str, Any]] | None = ..., + chat_template: str | None = ..., + tokenize: Literal[False] = ..., + **kwargs, +) -> str: ... def safe_apply_chat_template( model_config: "ModelConfig", tokenizer: HfTokenizer, diff --git a/vllm/sampling_params.py b/vllm/sampling_params.py index 580dbb6ec..f7a2e8b3f 100644 --- a/vllm/sampling_params.py +++ b/vllm/sampling_params.py @@ -533,6 +533,7 @@ class SamplingParams( if eos_ids: self._all_stop_token_ids.update(eos_ids) if not self.ignore_eos: + assert self.stop_token_ids is not None eos_ids.update(self.stop_token_ids) self.stop_token_ids = list(eos_ids) diff --git a/vllm/transformers_utils/configs/funaudiochat.py b/vllm/transformers_utils/configs/funaudiochat.py index 04505b273..36a446860 100644 --- a/vllm/transformers_utils/configs/funaudiochat.py +++ b/vllm/transformers_utils/configs/funaudiochat.py @@ -3,7 +3,7 @@ from __future__ import annotations -from transformers import PretrainedConfig +from transformers import CONFIG_MAPPING, PretrainedConfig # NOTE: Temporary shim for FunAudioChat checkpoints. # These checkpoints use `model_type="funaudiochat"`, which is not currently @@ -92,28 +92,24 @@ class FunAudioChatConfig(PretrainedConfig): self.audio_token_index = audio_token_index self.ignore_index = ignore_index - if isinstance(audio_config, dict): - audio_config.setdefault( - "model_type", FunAudioChatAudioEncoderConfig.model_type - ) - audio_config = FunAudioChatAudioEncoderConfig(**audio_config) - elif audio_config is None: - audio_config = FunAudioChatAudioEncoderConfig() - self.audio_config = audio_config + if audio_config is None: + self.audio_config = FunAudioChatAudioEncoderConfig() + elif isinstance(audio_config, dict): + default_model_type = FunAudioChatAudioEncoderConfig.model_type + audio_config.setdefault("model_type", default_model_type) + self.audio_config = FunAudioChatAudioEncoderConfig(**audio_config) + else: + self.audio_config = audio_config - if isinstance(text_config, dict): + if text_config is None: + self.text_config = CONFIG_MAPPING["qwen2"]() + elif isinstance(text_config, dict): # Default to qwen2 for backwards compatibility; FunAudioChat uses # qwen3 in practice for recent checkpoints. text_config.setdefault("model_type", "qwen2") - import transformers - - text_cls = transformers.CONFIG_MAPPING[text_config["model_type"]] - text_config = text_cls(**text_config) - elif text_config is None: - import transformers - - text_config = transformers.CONFIG_MAPPING["qwen2"]() - self.text_config = text_config + self.text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config) + else: + self.text_config = text_config self.hidden_size = ( int(self.text_config.hidden_size) diff --git a/vllm/transformers_utils/configs/kimi_k25.py b/vllm/transformers_utils/configs/kimi_k25.py index 72f67251d..710e9b563 100644 --- a/vllm/transformers_utils/configs/kimi_k25.py +++ b/vllm/transformers_utils/configs/kimi_k25.py @@ -90,17 +90,19 @@ class KimiK25Config(PretrainedConfig): ): # Vision config if vision_config is None: - vision_config = KimiK25VisionConfig() + self.vision_config = KimiK25VisionConfig() elif isinstance(vision_config, dict): - vision_config = KimiK25VisionConfig(**vision_config) - self.vision_config: KimiK25VisionConfig = vision_config + self.vision_config = KimiK25VisionConfig(**vision_config) + else: + self.vision_config = vision_config # Text config if text_config is None: - text_config = DeepseekV3Config() + self.text_config = DeepseekV3Config() elif isinstance(text_config, dict): - text_config = DeepseekV3Config(**text_config) - self.text_config: DeepseekV3Config = text_config + self.text_config = DeepseekV3Config(**text_config) + else: + self.text_config = text_config # Set mm_hidden_size to text hidden size if not explicitly set if self.vision_config.mm_hidden_size == self.vision_config.hidden_size: diff --git a/vllm/transformers_utils/processors/ovis2_5.py b/vllm/transformers_utils/processors/ovis2_5.py index 46ffd6a1e..11ac0360e 100644 --- a/vllm/transformers_utils/processors/ovis2_5.py +++ b/vllm/transformers_utils/processors/ovis2_5.py @@ -412,6 +412,7 @@ class Ovis2_5Processor(ProcessorMixin): images = video else: raise ValueError("Either images or video should be provided.") + assert images is not None min_pixels = min( max_pixels if max_pixels is not None else MAX_PIXELS, min_pixels if min_pixels is not None else MIN_PIXELS, diff --git a/vllm/v1/engine/detokenizer.py b/vllm/v1/engine/detokenizer.py index da950c2a0..2f81ba4f6 100644 --- a/vllm/v1/engine/detokenizer.py +++ b/vllm/v1/engine/detokenizer.py @@ -72,14 +72,12 @@ class BaseIncrementalDetokenizer(IncrementalDetokenizer, ABC): # Stop strings params = request.sampling_params assert params is not None - stop_list: list[str] if params.stop is None: - stop_list = [] + self.stop = [] elif isinstance(params.stop, str): - stop_list = [params.stop] + self.stop = [params.stop] else: - stop_list = params.stop - self.stop = stop_list + self.stop = params.stop self.min_tokens = params.min_tokens self.include_stop_str_in_output = params.include_stop_str_in_output diff --git a/vllm/v1/executor/ray_executor.py b/vllm/v1/executor/ray_executor.py index 2e35faae8..1cbc11990 100644 --- a/vllm/v1/executor/ray_executor.py +++ b/vllm/v1/executor/ray_executor.py @@ -282,8 +282,8 @@ class RayDistributedExecutor(Executor): # driver_dummy_worker can be None when using ray spmd worker. continue worker_node_and_gpu_ids.append( - ray.get(worker.get_node_and_gpu_ids.remote()) - ) # type: ignore[attr-defined] + ray.get(worker.get_node_and_gpu_ids.remote()) # type: ignore[attr-defined] + ) node_workers = defaultdict(list) # node id -> list of worker ranks node_gpus = defaultdict(list) # node id -> list of gpu ids