[CI] Bump mypy version to 1.19.1 (#36104)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2026-03-10 16:18:28 +00:00
parent 82f3f30e26
commit f83b933b84
14 changed files with 76 additions and 35 deletions
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -55,7 +55,7 @@ repos:
      language: python
      types_or: [python, pyi]
      require_serial: true
-      additional_dependencies: ["mypy[faster-cache]==1.15.0", regex, types-cachetools, types-setuptools, types-PyYAML, types-requests, types-torch, pydantic]
+      additional_dependencies: ["mypy[faster-cache]==1.19.1", regex, types-cachetools, types-setuptools, types-PyYAML, types-requests, types-torch, pydantic]
  - id: mypy-3.10 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward
    name: Run mypy for Python 3.10
    entry: python tools/pre_commit/mypy.py 1 "3.10"
--- a/tests/quantization/test_mixed_precision.py
+++ b/tests/quantization/test_mixed_precision.py
@@ -8,6 +8,7 @@ Run `pytest tests/quantization/test_mixed_precision.py`.

 import importlib
 import importlib.metadata
+import importlib.util
 from dataclasses import dataclass

 import lm_eval
--- a/vllm/distributed/device_communicators/shm_broadcast.py
+++ b/vllm/distributed/device_communicators/shm_broadcast.py
@@ -274,6 +274,7 @@ class ShmRingBuffer:
            self.shared_memory = shared_memory.SharedMemory(
                create=True, size=self.total_bytes_of_buffer
            )
+            assert self.shared_memory.buf is not None, "Buffer was not created"
            # initialize the metadata section to 0
            with self.shared_memory.buf[self.metadata_offset :] as metadata_buffer:
                torch.frombuffer(metadata_buffer, dtype=torch.uint8).fill_(0)
@@ -325,6 +326,7 @@ class ShmRingBuffer:
    def get_data(self, current_idx: int):
        start = self.data_offset + current_idx * self.max_chunk_bytes
        end = start + self.max_chunk_bytes
+        assert self.shared_memory.buf is not None, "Buffer has been closed"
        with self.shared_memory.buf[start:end] as buf:
            yield buf

@@ -332,6 +334,7 @@ class ShmRingBuffer:
    def get_metadata(self, current_idx: int):
        start = self.metadata_offset + current_idx * self.metadata_size
        end = start + self.metadata_size
+        assert self.shared_memory.buf is not None, "Buffer has been closed"
        with self.shared_memory.buf[start:end] as buf:
            yield buf

--- a/vllm/distributed/device_communicators/shm_object_storage.py
+++ b/vllm/distributed/device_communicators/shm_object_storage.py
@@ -197,6 +197,7 @@ class SingleWriterShmRingBuffer:
        """
        assert self.is_writer, "Only the writer can allocate buffers."
        assert size > 0, "Size must be greater than 0"
+        assert self.shared_memory.buf is not None, "Buffer has been closed"
        size += self.MD_SIZE  # add metadata size to the buffer size
        # reset to beginning if the buffer does have enough contiguous space
        buffer_end_reset = self.data_buffer_end % self.data_buffer_size
@@ -239,6 +240,7 @@ class SingleWriterShmRingBuffer:

    @contextmanager
    def access_buf(self, address: int):
+        assert self.shared_memory.buf is not None, "Buffer has been closed"
        buf_idx = address % self.data_buffer_size

        # read metadata
--- a/vllm/distributed/kv_transfer/kv_connector/utils.py
+++ b/vllm/distributed/kv_transfer/kv_connector/utils.py
@@ -351,6 +351,7 @@ class TpKVTopology:
                    include_num_layers_dimension=self._cross_layers_blocks
                )
            except (AttributeError, NotImplementedError):
+                assert self.tensor_shape is not None
                kv_cache_stride_order = tuple(range(len(self.tensor_shape)))

            # In case of cross layers permute kv_cache_shape according to
--- a/vllm/distributed/parallel_state.py
+++ b/vllm/distributed/parallel_state.py
@@ -1964,6 +1964,7 @@ def in_the_same_node_as(
            if rank == source_rank:
                # create a shared memory segment
                shm = shared_memory.SharedMemory(create=True, size=128)
+                assert shm.buf is not None, "Buffer was not created"
                shm.buf[: len(magic_message)] = magic_message
                if isinstance(pg, ProcessGroup):
                    torch.distributed.broadcast_object_list(
@@ -1990,6 +1991,7 @@ def in_the_same_node_as(
                    lambda *args, **kwargs: None,
                ):
                    shm = shared_memory.SharedMemory(name=name)
+                assert shm.buf is not None, "Buffer was not opened"
                if shm.buf[: len(magic_message)] == magic_message:
                    is_in_the_same_node[rank] = 1
    except Exception as e:
--- a/vllm/lora/layers/base.py
+++ b/vllm/lora/layers/base.py
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project

-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, overload

 import torch
 import torch.nn as nn
@@ -14,12 +14,24 @@ if TYPE_CHECKING:


 class BaseLayerWithLoRA(nn.Module):
+    @overload
+    def slice_lora_a(
+        self, lora_a: list[torch.Tensor | None]
+    ) -> list[torch.Tensor | None]: ...
+    @overload
+    def slice_lora_a(self, lora_a: torch.Tensor) -> torch.Tensor: ...
    def slice_lora_a(
        self, lora_a: torch.Tensor | list[torch.Tensor | None]
    ) -> torch.Tensor | list[torch.Tensor | None]:
        """Slice lora a if splitting for tensor parallelism."""
        ...

+    @overload
+    def slice_lora_b(
+        self, lora_b: list[torch.Tensor | None]
+    ) -> list[torch.Tensor | None]: ...
+    @overload
+    def slice_lora_b(self, lora_b: torch.Tensor) -> torch.Tensor: ...
    def slice_lora_b(
        self, lora_b: torch.Tensor | list[torch.Tensor | None]
    ) -> torch.Tensor | list[torch.Tensor | None]:
--- a/vllm/renderers/hf.py
+++ b/vllm/renderers/hf.py
@@ -5,7 +5,7 @@ import itertools
 from collections import defaultdict, deque
 from collections.abc import Set
 from functools import lru_cache
-from typing import TYPE_CHECKING, Any, cast
+from typing import TYPE_CHECKING, Any, Literal, cast, overload

 import jinja2
 import jinja2.ext
@@ -439,6 +439,28 @@ def resolve_chat_template_kwargs(
    return {k: v for k, v in chat_template_kwargs.items() if k in accept_vars}


+@overload
+def safe_apply_chat_template(
+    model_config: "ModelConfig",
+    tokenizer: HfTokenizer,
+    conversation: list[ConversationMessage],
+    *,
+    tools: list[dict[str, Any]] | None = ...,
+    chat_template: str | None = ...,
+    tokenize: Literal[True] = ...,
+    **kwargs,
+) -> list[int]: ...
+@overload
+def safe_apply_chat_template(
+    model_config: "ModelConfig",
+    tokenizer: HfTokenizer,
+    conversation: list[ConversationMessage],
+    *,
+    tools: list[dict[str, Any]] | None = ...,
+    chat_template: str | None = ...,
+    tokenize: Literal[False] = ...,
+    **kwargs,
+) -> str: ...
 def safe_apply_chat_template(
    model_config: "ModelConfig",
    tokenizer: HfTokenizer,
--- a/vllm/sampling_params.py
+++ b/vllm/sampling_params.py
@@ -533,6 +533,7 @@ class SamplingParams(
            if eos_ids:
                self._all_stop_token_ids.update(eos_ids)
                if not self.ignore_eos:
+                    assert self.stop_token_ids is not None
                    eos_ids.update(self.stop_token_ids)
                    self.stop_token_ids = list(eos_ids)

--- a/vllm/transformers_utils/configs/funaudiochat.py
+++ b/vllm/transformers_utils/configs/funaudiochat.py
@@ -3,7 +3,7 @@

 from __future__ import annotations

-from transformers import PretrainedConfig
+from transformers import CONFIG_MAPPING, PretrainedConfig

 # NOTE: Temporary shim for FunAudioChat checkpoints.
 # These checkpoints use `model_type="funaudiochat"`, which is not currently
@@ -92,27 +92,23 @@ class FunAudioChatConfig(PretrainedConfig):
        self.audio_token_index = audio_token_index
        self.ignore_index = ignore_index

-        if isinstance(audio_config, dict):
-            audio_config.setdefault(
-                "model_type", FunAudioChatAudioEncoderConfig.model_type
-            )
-            audio_config = FunAudioChatAudioEncoderConfig(**audio_config)
-        elif audio_config is None:
-            audio_config = FunAudioChatAudioEncoderConfig()
+        if audio_config is None:
+            self.audio_config = FunAudioChatAudioEncoderConfig()
+        elif isinstance(audio_config, dict):
+            default_model_type = FunAudioChatAudioEncoderConfig.model_type
+            audio_config.setdefault("model_type", default_model_type)
+            self.audio_config = FunAudioChatAudioEncoderConfig(**audio_config)
+        else:
            self.audio_config = audio_config

-        if isinstance(text_config, dict):
+        if text_config is None:
+            self.text_config = CONFIG_MAPPING["qwen2"]()
+        elif isinstance(text_config, dict):
            # Default to qwen2 for backwards compatibility; FunAudioChat uses
            # qwen3 in practice for recent checkpoints.
            text_config.setdefault("model_type", "qwen2")
-            import transformers
-
-            text_cls = transformers.CONFIG_MAPPING[text_config["model_type"]]
-            text_config = text_cls(**text_config)
-        elif text_config is None:
-            import transformers
-
-            text_config = transformers.CONFIG_MAPPING["qwen2"]()
+            self.text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config)
+        else:
            self.text_config = text_config

        self.hidden_size = (
--- a/vllm/transformers_utils/configs/kimi_k25.py
+++ b/vllm/transformers_utils/configs/kimi_k25.py
@@ -90,17 +90,19 @@ class KimiK25Config(PretrainedConfig):
    ):
        # Vision config
        if vision_config is None:
-            vision_config = KimiK25VisionConfig()
+            self.vision_config = KimiK25VisionConfig()
        elif isinstance(vision_config, dict):
-            vision_config = KimiK25VisionConfig(**vision_config)
-        self.vision_config: KimiK25VisionConfig = vision_config
+            self.vision_config = KimiK25VisionConfig(**vision_config)
+        else:
+            self.vision_config = vision_config

        # Text config
        if text_config is None:
-            text_config = DeepseekV3Config()
+            self.text_config = DeepseekV3Config()
        elif isinstance(text_config, dict):
-            text_config = DeepseekV3Config(**text_config)
-        self.text_config: DeepseekV3Config = text_config
+            self.text_config = DeepseekV3Config(**text_config)
+        else:
+            self.text_config = text_config

        # Set mm_hidden_size to text hidden size if not explicitly set
        if self.vision_config.mm_hidden_size == self.vision_config.hidden_size:
--- a/vllm/transformers_utils/processors/ovis2_5.py
+++ b/vllm/transformers_utils/processors/ovis2_5.py
@@ -412,6 +412,7 @@ class Ovis2_5Processor(ProcessorMixin):
                images = video
        else:
            raise ValueError("Either images or video should be provided.")
+        assert images is not None
        min_pixels = min(
            max_pixels if max_pixels is not None else MAX_PIXELS,
            min_pixels if min_pixels is not None else MIN_PIXELS,
--- a/vllm/v1/engine/detokenizer.py
+++ b/vllm/v1/engine/detokenizer.py
@@ -72,14 +72,12 @@ class BaseIncrementalDetokenizer(IncrementalDetokenizer, ABC):
        # Stop strings
        params = request.sampling_params
        assert params is not None
-        stop_list: list[str]
        if params.stop is None:
-            stop_list = []
+            self.stop = []
        elif isinstance(params.stop, str):
-            stop_list = [params.stop]
+            self.stop = [params.stop]
        else:
-            stop_list = params.stop
-        self.stop = stop_list
+            self.stop = params.stop
        self.min_tokens = params.min_tokens
        self.include_stop_str_in_output = params.include_stop_str_in_output

--- a/vllm/v1/executor/ray_executor.py
+++ b/vllm/v1/executor/ray_executor.py
@@ -282,8 +282,8 @@ class RayDistributedExecutor(Executor):
                # driver_dummy_worker can be None when using ray spmd worker.
                continue
            worker_node_and_gpu_ids.append(
-                ray.get(worker.get_node_and_gpu_ids.remote())
-            )  # type: ignore[attr-defined]
+                ray.get(worker.get_node_and_gpu_ids.remote())  # type: ignore[attr-defined]
+            )

        node_workers = defaultdict(list)  # node id -> list of worker ranks
        node_gpus = defaultdict(list)  # node id -> list of gpu ids