[CI] Bump mypy version to 1.19.1 (#36104)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -55,7 +55,7 @@ repos:
|
||||
language: python
|
||||
types_or: [python, pyi]
|
||||
require_serial: true
|
||||
additional_dependencies: ["mypy[faster-cache]==1.15.0", regex, types-cachetools, types-setuptools, types-PyYAML, types-requests, types-torch, pydantic]
|
||||
additional_dependencies: ["mypy[faster-cache]==1.19.1", regex, types-cachetools, types-setuptools, types-PyYAML, types-requests, types-torch, pydantic]
|
||||
- id: mypy-3.10 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward
|
||||
name: Run mypy for Python 3.10
|
||||
entry: python tools/pre_commit/mypy.py 1 "3.10"
|
||||
|
||||
@@ -8,6 +8,7 @@ Run `pytest tests/quantization/test_mixed_precision.py`.
|
||||
|
||||
import importlib
|
||||
import importlib.metadata
|
||||
import importlib.util
|
||||
from dataclasses import dataclass
|
||||
|
||||
import lm_eval
|
||||
|
||||
@@ -274,6 +274,7 @@ class ShmRingBuffer:
|
||||
self.shared_memory = shared_memory.SharedMemory(
|
||||
create=True, size=self.total_bytes_of_buffer
|
||||
)
|
||||
assert self.shared_memory.buf is not None, "Buffer was not created"
|
||||
# initialize the metadata section to 0
|
||||
with self.shared_memory.buf[self.metadata_offset :] as metadata_buffer:
|
||||
torch.frombuffer(metadata_buffer, dtype=torch.uint8).fill_(0)
|
||||
@@ -325,6 +326,7 @@ class ShmRingBuffer:
|
||||
def get_data(self, current_idx: int):
|
||||
start = self.data_offset + current_idx * self.max_chunk_bytes
|
||||
end = start + self.max_chunk_bytes
|
||||
assert self.shared_memory.buf is not None, "Buffer has been closed"
|
||||
with self.shared_memory.buf[start:end] as buf:
|
||||
yield buf
|
||||
|
||||
@@ -332,6 +334,7 @@ class ShmRingBuffer:
|
||||
def get_metadata(self, current_idx: int):
|
||||
start = self.metadata_offset + current_idx * self.metadata_size
|
||||
end = start + self.metadata_size
|
||||
assert self.shared_memory.buf is not None, "Buffer has been closed"
|
||||
with self.shared_memory.buf[start:end] as buf:
|
||||
yield buf
|
||||
|
||||
|
||||
@@ -197,6 +197,7 @@ class SingleWriterShmRingBuffer:
|
||||
"""
|
||||
assert self.is_writer, "Only the writer can allocate buffers."
|
||||
assert size > 0, "Size must be greater than 0"
|
||||
assert self.shared_memory.buf is not None, "Buffer has been closed"
|
||||
size += self.MD_SIZE # add metadata size to the buffer size
|
||||
# reset to beginning if the buffer does have enough contiguous space
|
||||
buffer_end_reset = self.data_buffer_end % self.data_buffer_size
|
||||
@@ -239,6 +240,7 @@ class SingleWriterShmRingBuffer:
|
||||
|
||||
@contextmanager
|
||||
def access_buf(self, address: int):
|
||||
assert self.shared_memory.buf is not None, "Buffer has been closed"
|
||||
buf_idx = address % self.data_buffer_size
|
||||
|
||||
# read metadata
|
||||
|
||||
@@ -351,6 +351,7 @@ class TpKVTopology:
|
||||
include_num_layers_dimension=self._cross_layers_blocks
|
||||
)
|
||||
except (AttributeError, NotImplementedError):
|
||||
assert self.tensor_shape is not None
|
||||
kv_cache_stride_order = tuple(range(len(self.tensor_shape)))
|
||||
|
||||
# In case of cross layers permute kv_cache_shape according to
|
||||
|
||||
@@ -1964,6 +1964,7 @@ def in_the_same_node_as(
|
||||
if rank == source_rank:
|
||||
# create a shared memory segment
|
||||
shm = shared_memory.SharedMemory(create=True, size=128)
|
||||
assert shm.buf is not None, "Buffer was not created"
|
||||
shm.buf[: len(magic_message)] = magic_message
|
||||
if isinstance(pg, ProcessGroup):
|
||||
torch.distributed.broadcast_object_list(
|
||||
@@ -1990,6 +1991,7 @@ def in_the_same_node_as(
|
||||
lambda *args, **kwargs: None,
|
||||
):
|
||||
shm = shared_memory.SharedMemory(name=name)
|
||||
assert shm.buf is not None, "Buffer was not opened"
|
||||
if shm.buf[: len(magic_message)] == magic_message:
|
||||
is_in_the_same_node[rank] = 1
|
||||
except Exception as e:
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import TYPE_CHECKING, overload
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
@@ -14,12 +14,24 @@ if TYPE_CHECKING:
|
||||
|
||||
|
||||
class BaseLayerWithLoRA(nn.Module):
|
||||
@overload
|
||||
def slice_lora_a(
|
||||
self, lora_a: list[torch.Tensor | None]
|
||||
) -> list[torch.Tensor | None]: ...
|
||||
@overload
|
||||
def slice_lora_a(self, lora_a: torch.Tensor) -> torch.Tensor: ...
|
||||
def slice_lora_a(
|
||||
self, lora_a: torch.Tensor | list[torch.Tensor | None]
|
||||
) -> torch.Tensor | list[torch.Tensor | None]:
|
||||
"""Slice lora a if splitting for tensor parallelism."""
|
||||
...
|
||||
|
||||
@overload
|
||||
def slice_lora_b(
|
||||
self, lora_b: list[torch.Tensor | None]
|
||||
) -> list[torch.Tensor | None]: ...
|
||||
@overload
|
||||
def slice_lora_b(self, lora_b: torch.Tensor) -> torch.Tensor: ...
|
||||
def slice_lora_b(
|
||||
self, lora_b: torch.Tensor | list[torch.Tensor | None]
|
||||
) -> torch.Tensor | list[torch.Tensor | None]:
|
||||
|
||||
@@ -5,7 +5,7 @@ import itertools
|
||||
from collections import defaultdict, deque
|
||||
from collections.abc import Set
|
||||
from functools import lru_cache
|
||||
from typing import TYPE_CHECKING, Any, cast
|
||||
from typing import TYPE_CHECKING, Any, Literal, cast, overload
|
||||
|
||||
import jinja2
|
||||
import jinja2.ext
|
||||
@@ -439,6 +439,28 @@ def resolve_chat_template_kwargs(
|
||||
return {k: v for k, v in chat_template_kwargs.items() if k in accept_vars}
|
||||
|
||||
|
||||
@overload
|
||||
def safe_apply_chat_template(
|
||||
model_config: "ModelConfig",
|
||||
tokenizer: HfTokenizer,
|
||||
conversation: list[ConversationMessage],
|
||||
*,
|
||||
tools: list[dict[str, Any]] | None = ...,
|
||||
chat_template: str | None = ...,
|
||||
tokenize: Literal[True] = ...,
|
||||
**kwargs,
|
||||
) -> list[int]: ...
|
||||
@overload
|
||||
def safe_apply_chat_template(
|
||||
model_config: "ModelConfig",
|
||||
tokenizer: HfTokenizer,
|
||||
conversation: list[ConversationMessage],
|
||||
*,
|
||||
tools: list[dict[str, Any]] | None = ...,
|
||||
chat_template: str | None = ...,
|
||||
tokenize: Literal[False] = ...,
|
||||
**kwargs,
|
||||
) -> str: ...
|
||||
def safe_apply_chat_template(
|
||||
model_config: "ModelConfig",
|
||||
tokenizer: HfTokenizer,
|
||||
|
||||
@@ -533,6 +533,7 @@ class SamplingParams(
|
||||
if eos_ids:
|
||||
self._all_stop_token_ids.update(eos_ids)
|
||||
if not self.ignore_eos:
|
||||
assert self.stop_token_ids is not None
|
||||
eos_ids.update(self.stop_token_ids)
|
||||
self.stop_token_ids = list(eos_ids)
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from transformers import PretrainedConfig
|
||||
from transformers import CONFIG_MAPPING, PretrainedConfig
|
||||
|
||||
# NOTE: Temporary shim for FunAudioChat checkpoints.
|
||||
# These checkpoints use `model_type="funaudiochat"`, which is not currently
|
||||
@@ -92,28 +92,24 @@ class FunAudioChatConfig(PretrainedConfig):
|
||||
self.audio_token_index = audio_token_index
|
||||
self.ignore_index = ignore_index
|
||||
|
||||
if isinstance(audio_config, dict):
|
||||
audio_config.setdefault(
|
||||
"model_type", FunAudioChatAudioEncoderConfig.model_type
|
||||
)
|
||||
audio_config = FunAudioChatAudioEncoderConfig(**audio_config)
|
||||
elif audio_config is None:
|
||||
audio_config = FunAudioChatAudioEncoderConfig()
|
||||
self.audio_config = audio_config
|
||||
if audio_config is None:
|
||||
self.audio_config = FunAudioChatAudioEncoderConfig()
|
||||
elif isinstance(audio_config, dict):
|
||||
default_model_type = FunAudioChatAudioEncoderConfig.model_type
|
||||
audio_config.setdefault("model_type", default_model_type)
|
||||
self.audio_config = FunAudioChatAudioEncoderConfig(**audio_config)
|
||||
else:
|
||||
self.audio_config = audio_config
|
||||
|
||||
if isinstance(text_config, dict):
|
||||
if text_config is None:
|
||||
self.text_config = CONFIG_MAPPING["qwen2"]()
|
||||
elif isinstance(text_config, dict):
|
||||
# Default to qwen2 for backwards compatibility; FunAudioChat uses
|
||||
# qwen3 in practice for recent checkpoints.
|
||||
text_config.setdefault("model_type", "qwen2")
|
||||
import transformers
|
||||
|
||||
text_cls = transformers.CONFIG_MAPPING[text_config["model_type"]]
|
||||
text_config = text_cls(**text_config)
|
||||
elif text_config is None:
|
||||
import transformers
|
||||
|
||||
text_config = transformers.CONFIG_MAPPING["qwen2"]()
|
||||
self.text_config = text_config
|
||||
self.text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config)
|
||||
else:
|
||||
self.text_config = text_config
|
||||
|
||||
self.hidden_size = (
|
||||
int(self.text_config.hidden_size)
|
||||
|
||||
@@ -90,17 +90,19 @@ class KimiK25Config(PretrainedConfig):
|
||||
):
|
||||
# Vision config
|
||||
if vision_config is None:
|
||||
vision_config = KimiK25VisionConfig()
|
||||
self.vision_config = KimiK25VisionConfig()
|
||||
elif isinstance(vision_config, dict):
|
||||
vision_config = KimiK25VisionConfig(**vision_config)
|
||||
self.vision_config: KimiK25VisionConfig = vision_config
|
||||
self.vision_config = KimiK25VisionConfig(**vision_config)
|
||||
else:
|
||||
self.vision_config = vision_config
|
||||
|
||||
# Text config
|
||||
if text_config is None:
|
||||
text_config = DeepseekV3Config()
|
||||
self.text_config = DeepseekV3Config()
|
||||
elif isinstance(text_config, dict):
|
||||
text_config = DeepseekV3Config(**text_config)
|
||||
self.text_config: DeepseekV3Config = text_config
|
||||
self.text_config = DeepseekV3Config(**text_config)
|
||||
else:
|
||||
self.text_config = text_config
|
||||
|
||||
# Set mm_hidden_size to text hidden size if not explicitly set
|
||||
if self.vision_config.mm_hidden_size == self.vision_config.hidden_size:
|
||||
|
||||
@@ -412,6 +412,7 @@ class Ovis2_5Processor(ProcessorMixin):
|
||||
images = video
|
||||
else:
|
||||
raise ValueError("Either images or video should be provided.")
|
||||
assert images is not None
|
||||
min_pixels = min(
|
||||
max_pixels if max_pixels is not None else MAX_PIXELS,
|
||||
min_pixels if min_pixels is not None else MIN_PIXELS,
|
||||
|
||||
@@ -72,14 +72,12 @@ class BaseIncrementalDetokenizer(IncrementalDetokenizer, ABC):
|
||||
# Stop strings
|
||||
params = request.sampling_params
|
||||
assert params is not None
|
||||
stop_list: list[str]
|
||||
if params.stop is None:
|
||||
stop_list = []
|
||||
self.stop = []
|
||||
elif isinstance(params.stop, str):
|
||||
stop_list = [params.stop]
|
||||
self.stop = [params.stop]
|
||||
else:
|
||||
stop_list = params.stop
|
||||
self.stop = stop_list
|
||||
self.stop = params.stop
|
||||
self.min_tokens = params.min_tokens
|
||||
self.include_stop_str_in_output = params.include_stop_str_in_output
|
||||
|
||||
|
||||
@@ -282,8 +282,8 @@ class RayDistributedExecutor(Executor):
|
||||
# driver_dummy_worker can be None when using ray spmd worker.
|
||||
continue
|
||||
worker_node_and_gpu_ids.append(
|
||||
ray.get(worker.get_node_and_gpu_ids.remote())
|
||||
) # type: ignore[attr-defined]
|
||||
ray.get(worker.get_node_and_gpu_ids.remote()) # type: ignore[attr-defined]
|
||||
)
|
||||
|
||||
node_workers = defaultdict(list) # node id -> list of worker ranks
|
||||
node_gpus = defaultdict(list) # node id -> list of gpu ids
|
||||
|
||||
Reference in New Issue
Block a user