[CI] Bump mypy version to 1.19.1 (#36104)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -55,7 +55,7 @@ repos:
|
|||||||
language: python
|
language: python
|
||||||
types_or: [python, pyi]
|
types_or: [python, pyi]
|
||||||
require_serial: true
|
require_serial: true
|
||||||
additional_dependencies: ["mypy[faster-cache]==1.15.0", regex, types-cachetools, types-setuptools, types-PyYAML, types-requests, types-torch, pydantic]
|
additional_dependencies: ["mypy[faster-cache]==1.19.1", regex, types-cachetools, types-setuptools, types-PyYAML, types-requests, types-torch, pydantic]
|
||||||
- id: mypy-3.10 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward
|
- id: mypy-3.10 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward
|
||||||
name: Run mypy for Python 3.10
|
name: Run mypy for Python 3.10
|
||||||
entry: python tools/pre_commit/mypy.py 1 "3.10"
|
entry: python tools/pre_commit/mypy.py 1 "3.10"
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ Run `pytest tests/quantization/test_mixed_precision.py`.
|
|||||||
|
|
||||||
import importlib
|
import importlib
|
||||||
import importlib.metadata
|
import importlib.metadata
|
||||||
|
import importlib.util
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
import lm_eval
|
import lm_eval
|
||||||
|
|||||||
@@ -274,6 +274,7 @@ class ShmRingBuffer:
|
|||||||
self.shared_memory = shared_memory.SharedMemory(
|
self.shared_memory = shared_memory.SharedMemory(
|
||||||
create=True, size=self.total_bytes_of_buffer
|
create=True, size=self.total_bytes_of_buffer
|
||||||
)
|
)
|
||||||
|
assert self.shared_memory.buf is not None, "Buffer was not created"
|
||||||
# initialize the metadata section to 0
|
# initialize the metadata section to 0
|
||||||
with self.shared_memory.buf[self.metadata_offset :] as metadata_buffer:
|
with self.shared_memory.buf[self.metadata_offset :] as metadata_buffer:
|
||||||
torch.frombuffer(metadata_buffer, dtype=torch.uint8).fill_(0)
|
torch.frombuffer(metadata_buffer, dtype=torch.uint8).fill_(0)
|
||||||
@@ -325,6 +326,7 @@ class ShmRingBuffer:
|
|||||||
def get_data(self, current_idx: int):
|
def get_data(self, current_idx: int):
|
||||||
start = self.data_offset + current_idx * self.max_chunk_bytes
|
start = self.data_offset + current_idx * self.max_chunk_bytes
|
||||||
end = start + self.max_chunk_bytes
|
end = start + self.max_chunk_bytes
|
||||||
|
assert self.shared_memory.buf is not None, "Buffer has been closed"
|
||||||
with self.shared_memory.buf[start:end] as buf:
|
with self.shared_memory.buf[start:end] as buf:
|
||||||
yield buf
|
yield buf
|
||||||
|
|
||||||
@@ -332,6 +334,7 @@ class ShmRingBuffer:
|
|||||||
def get_metadata(self, current_idx: int):
|
def get_metadata(self, current_idx: int):
|
||||||
start = self.metadata_offset + current_idx * self.metadata_size
|
start = self.metadata_offset + current_idx * self.metadata_size
|
||||||
end = start + self.metadata_size
|
end = start + self.metadata_size
|
||||||
|
assert self.shared_memory.buf is not None, "Buffer has been closed"
|
||||||
with self.shared_memory.buf[start:end] as buf:
|
with self.shared_memory.buf[start:end] as buf:
|
||||||
yield buf
|
yield buf
|
||||||
|
|
||||||
|
|||||||
@@ -197,6 +197,7 @@ class SingleWriterShmRingBuffer:
|
|||||||
"""
|
"""
|
||||||
assert self.is_writer, "Only the writer can allocate buffers."
|
assert self.is_writer, "Only the writer can allocate buffers."
|
||||||
assert size > 0, "Size must be greater than 0"
|
assert size > 0, "Size must be greater than 0"
|
||||||
|
assert self.shared_memory.buf is not None, "Buffer has been closed"
|
||||||
size += self.MD_SIZE # add metadata size to the buffer size
|
size += self.MD_SIZE # add metadata size to the buffer size
|
||||||
# reset to beginning if the buffer does have enough contiguous space
|
# reset to beginning if the buffer does have enough contiguous space
|
||||||
buffer_end_reset = self.data_buffer_end % self.data_buffer_size
|
buffer_end_reset = self.data_buffer_end % self.data_buffer_size
|
||||||
@@ -239,6 +240,7 @@ class SingleWriterShmRingBuffer:
|
|||||||
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def access_buf(self, address: int):
|
def access_buf(self, address: int):
|
||||||
|
assert self.shared_memory.buf is not None, "Buffer has been closed"
|
||||||
buf_idx = address % self.data_buffer_size
|
buf_idx = address % self.data_buffer_size
|
||||||
|
|
||||||
# read metadata
|
# read metadata
|
||||||
|
|||||||
@@ -351,6 +351,7 @@ class TpKVTopology:
|
|||||||
include_num_layers_dimension=self._cross_layers_blocks
|
include_num_layers_dimension=self._cross_layers_blocks
|
||||||
)
|
)
|
||||||
except (AttributeError, NotImplementedError):
|
except (AttributeError, NotImplementedError):
|
||||||
|
assert self.tensor_shape is not None
|
||||||
kv_cache_stride_order = tuple(range(len(self.tensor_shape)))
|
kv_cache_stride_order = tuple(range(len(self.tensor_shape)))
|
||||||
|
|
||||||
# In case of cross layers permute kv_cache_shape according to
|
# In case of cross layers permute kv_cache_shape according to
|
||||||
|
|||||||
@@ -1964,6 +1964,7 @@ def in_the_same_node_as(
|
|||||||
if rank == source_rank:
|
if rank == source_rank:
|
||||||
# create a shared memory segment
|
# create a shared memory segment
|
||||||
shm = shared_memory.SharedMemory(create=True, size=128)
|
shm = shared_memory.SharedMemory(create=True, size=128)
|
||||||
|
assert shm.buf is not None, "Buffer was not created"
|
||||||
shm.buf[: len(magic_message)] = magic_message
|
shm.buf[: len(magic_message)] = magic_message
|
||||||
if isinstance(pg, ProcessGroup):
|
if isinstance(pg, ProcessGroup):
|
||||||
torch.distributed.broadcast_object_list(
|
torch.distributed.broadcast_object_list(
|
||||||
@@ -1990,6 +1991,7 @@ def in_the_same_node_as(
|
|||||||
lambda *args, **kwargs: None,
|
lambda *args, **kwargs: None,
|
||||||
):
|
):
|
||||||
shm = shared_memory.SharedMemory(name=name)
|
shm = shared_memory.SharedMemory(name=name)
|
||||||
|
assert shm.buf is not None, "Buffer was not opened"
|
||||||
if shm.buf[: len(magic_message)] == magic_message:
|
if shm.buf[: len(magic_message)] == magic_message:
|
||||||
is_in_the_same_node[rank] = 1
|
is_in_the_same_node[rank] = 1
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
|
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING, overload
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
@@ -14,12 +14,24 @@ if TYPE_CHECKING:
|
|||||||
|
|
||||||
|
|
||||||
class BaseLayerWithLoRA(nn.Module):
|
class BaseLayerWithLoRA(nn.Module):
|
||||||
|
@overload
|
||||||
|
def slice_lora_a(
|
||||||
|
self, lora_a: list[torch.Tensor | None]
|
||||||
|
) -> list[torch.Tensor | None]: ...
|
||||||
|
@overload
|
||||||
|
def slice_lora_a(self, lora_a: torch.Tensor) -> torch.Tensor: ...
|
||||||
def slice_lora_a(
|
def slice_lora_a(
|
||||||
self, lora_a: torch.Tensor | list[torch.Tensor | None]
|
self, lora_a: torch.Tensor | list[torch.Tensor | None]
|
||||||
) -> torch.Tensor | list[torch.Tensor | None]:
|
) -> torch.Tensor | list[torch.Tensor | None]:
|
||||||
"""Slice lora a if splitting for tensor parallelism."""
|
"""Slice lora a if splitting for tensor parallelism."""
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@overload
|
||||||
|
def slice_lora_b(
|
||||||
|
self, lora_b: list[torch.Tensor | None]
|
||||||
|
) -> list[torch.Tensor | None]: ...
|
||||||
|
@overload
|
||||||
|
def slice_lora_b(self, lora_b: torch.Tensor) -> torch.Tensor: ...
|
||||||
def slice_lora_b(
|
def slice_lora_b(
|
||||||
self, lora_b: torch.Tensor | list[torch.Tensor | None]
|
self, lora_b: torch.Tensor | list[torch.Tensor | None]
|
||||||
) -> torch.Tensor | list[torch.Tensor | None]:
|
) -> torch.Tensor | list[torch.Tensor | None]:
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ import itertools
|
|||||||
from collections import defaultdict, deque
|
from collections import defaultdict, deque
|
||||||
from collections.abc import Set
|
from collections.abc import Set
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
from typing import TYPE_CHECKING, Any, cast
|
from typing import TYPE_CHECKING, Any, Literal, cast, overload
|
||||||
|
|
||||||
import jinja2
|
import jinja2
|
||||||
import jinja2.ext
|
import jinja2.ext
|
||||||
@@ -439,6 +439,28 @@ def resolve_chat_template_kwargs(
|
|||||||
return {k: v for k, v in chat_template_kwargs.items() if k in accept_vars}
|
return {k: v for k, v in chat_template_kwargs.items() if k in accept_vars}
|
||||||
|
|
||||||
|
|
||||||
|
@overload
|
||||||
|
def safe_apply_chat_template(
|
||||||
|
model_config: "ModelConfig",
|
||||||
|
tokenizer: HfTokenizer,
|
||||||
|
conversation: list[ConversationMessage],
|
||||||
|
*,
|
||||||
|
tools: list[dict[str, Any]] | None = ...,
|
||||||
|
chat_template: str | None = ...,
|
||||||
|
tokenize: Literal[True] = ...,
|
||||||
|
**kwargs,
|
||||||
|
) -> list[int]: ...
|
||||||
|
@overload
|
||||||
|
def safe_apply_chat_template(
|
||||||
|
model_config: "ModelConfig",
|
||||||
|
tokenizer: HfTokenizer,
|
||||||
|
conversation: list[ConversationMessage],
|
||||||
|
*,
|
||||||
|
tools: list[dict[str, Any]] | None = ...,
|
||||||
|
chat_template: str | None = ...,
|
||||||
|
tokenize: Literal[False] = ...,
|
||||||
|
**kwargs,
|
||||||
|
) -> str: ...
|
||||||
def safe_apply_chat_template(
|
def safe_apply_chat_template(
|
||||||
model_config: "ModelConfig",
|
model_config: "ModelConfig",
|
||||||
tokenizer: HfTokenizer,
|
tokenizer: HfTokenizer,
|
||||||
|
|||||||
@@ -533,6 +533,7 @@ class SamplingParams(
|
|||||||
if eos_ids:
|
if eos_ids:
|
||||||
self._all_stop_token_ids.update(eos_ids)
|
self._all_stop_token_ids.update(eos_ids)
|
||||||
if not self.ignore_eos:
|
if not self.ignore_eos:
|
||||||
|
assert self.stop_token_ids is not None
|
||||||
eos_ids.update(self.stop_token_ids)
|
eos_ids.update(self.stop_token_ids)
|
||||||
self.stop_token_ids = list(eos_ids)
|
self.stop_token_ids = list(eos_ids)
|
||||||
|
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from transformers import PretrainedConfig
|
from transformers import CONFIG_MAPPING, PretrainedConfig
|
||||||
|
|
||||||
# NOTE: Temporary shim for FunAudioChat checkpoints.
|
# NOTE: Temporary shim for FunAudioChat checkpoints.
|
||||||
# These checkpoints use `model_type="funaudiochat"`, which is not currently
|
# These checkpoints use `model_type="funaudiochat"`, which is not currently
|
||||||
@@ -92,28 +92,24 @@ class FunAudioChatConfig(PretrainedConfig):
|
|||||||
self.audio_token_index = audio_token_index
|
self.audio_token_index = audio_token_index
|
||||||
self.ignore_index = ignore_index
|
self.ignore_index = ignore_index
|
||||||
|
|
||||||
if isinstance(audio_config, dict):
|
if audio_config is None:
|
||||||
audio_config.setdefault(
|
self.audio_config = FunAudioChatAudioEncoderConfig()
|
||||||
"model_type", FunAudioChatAudioEncoderConfig.model_type
|
elif isinstance(audio_config, dict):
|
||||||
)
|
default_model_type = FunAudioChatAudioEncoderConfig.model_type
|
||||||
audio_config = FunAudioChatAudioEncoderConfig(**audio_config)
|
audio_config.setdefault("model_type", default_model_type)
|
||||||
elif audio_config is None:
|
self.audio_config = FunAudioChatAudioEncoderConfig(**audio_config)
|
||||||
audio_config = FunAudioChatAudioEncoderConfig()
|
else:
|
||||||
self.audio_config = audio_config
|
self.audio_config = audio_config
|
||||||
|
|
||||||
if isinstance(text_config, dict):
|
if text_config is None:
|
||||||
|
self.text_config = CONFIG_MAPPING["qwen2"]()
|
||||||
|
elif isinstance(text_config, dict):
|
||||||
# Default to qwen2 for backwards compatibility; FunAudioChat uses
|
# Default to qwen2 for backwards compatibility; FunAudioChat uses
|
||||||
# qwen3 in practice for recent checkpoints.
|
# qwen3 in practice for recent checkpoints.
|
||||||
text_config.setdefault("model_type", "qwen2")
|
text_config.setdefault("model_type", "qwen2")
|
||||||
import transformers
|
self.text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config)
|
||||||
|
else:
|
||||||
text_cls = transformers.CONFIG_MAPPING[text_config["model_type"]]
|
self.text_config = text_config
|
||||||
text_config = text_cls(**text_config)
|
|
||||||
elif text_config is None:
|
|
||||||
import transformers
|
|
||||||
|
|
||||||
text_config = transformers.CONFIG_MAPPING["qwen2"]()
|
|
||||||
self.text_config = text_config
|
|
||||||
|
|
||||||
self.hidden_size = (
|
self.hidden_size = (
|
||||||
int(self.text_config.hidden_size)
|
int(self.text_config.hidden_size)
|
||||||
|
|||||||
@@ -90,17 +90,19 @@ class KimiK25Config(PretrainedConfig):
|
|||||||
):
|
):
|
||||||
# Vision config
|
# Vision config
|
||||||
if vision_config is None:
|
if vision_config is None:
|
||||||
vision_config = KimiK25VisionConfig()
|
self.vision_config = KimiK25VisionConfig()
|
||||||
elif isinstance(vision_config, dict):
|
elif isinstance(vision_config, dict):
|
||||||
vision_config = KimiK25VisionConfig(**vision_config)
|
self.vision_config = KimiK25VisionConfig(**vision_config)
|
||||||
self.vision_config: KimiK25VisionConfig = vision_config
|
else:
|
||||||
|
self.vision_config = vision_config
|
||||||
|
|
||||||
# Text config
|
# Text config
|
||||||
if text_config is None:
|
if text_config is None:
|
||||||
text_config = DeepseekV3Config()
|
self.text_config = DeepseekV3Config()
|
||||||
elif isinstance(text_config, dict):
|
elif isinstance(text_config, dict):
|
||||||
text_config = DeepseekV3Config(**text_config)
|
self.text_config = DeepseekV3Config(**text_config)
|
||||||
self.text_config: DeepseekV3Config = text_config
|
else:
|
||||||
|
self.text_config = text_config
|
||||||
|
|
||||||
# Set mm_hidden_size to text hidden size if not explicitly set
|
# Set mm_hidden_size to text hidden size if not explicitly set
|
||||||
if self.vision_config.mm_hidden_size == self.vision_config.hidden_size:
|
if self.vision_config.mm_hidden_size == self.vision_config.hidden_size:
|
||||||
|
|||||||
@@ -412,6 +412,7 @@ class Ovis2_5Processor(ProcessorMixin):
|
|||||||
images = video
|
images = video
|
||||||
else:
|
else:
|
||||||
raise ValueError("Either images or video should be provided.")
|
raise ValueError("Either images or video should be provided.")
|
||||||
|
assert images is not None
|
||||||
min_pixels = min(
|
min_pixels = min(
|
||||||
max_pixels if max_pixels is not None else MAX_PIXELS,
|
max_pixels if max_pixels is not None else MAX_PIXELS,
|
||||||
min_pixels if min_pixels is not None else MIN_PIXELS,
|
min_pixels if min_pixels is not None else MIN_PIXELS,
|
||||||
|
|||||||
@@ -72,14 +72,12 @@ class BaseIncrementalDetokenizer(IncrementalDetokenizer, ABC):
|
|||||||
# Stop strings
|
# Stop strings
|
||||||
params = request.sampling_params
|
params = request.sampling_params
|
||||||
assert params is not None
|
assert params is not None
|
||||||
stop_list: list[str]
|
|
||||||
if params.stop is None:
|
if params.stop is None:
|
||||||
stop_list = []
|
self.stop = []
|
||||||
elif isinstance(params.stop, str):
|
elif isinstance(params.stop, str):
|
||||||
stop_list = [params.stop]
|
self.stop = [params.stop]
|
||||||
else:
|
else:
|
||||||
stop_list = params.stop
|
self.stop = params.stop
|
||||||
self.stop = stop_list
|
|
||||||
self.min_tokens = params.min_tokens
|
self.min_tokens = params.min_tokens
|
||||||
self.include_stop_str_in_output = params.include_stop_str_in_output
|
self.include_stop_str_in_output = params.include_stop_str_in_output
|
||||||
|
|
||||||
|
|||||||
@@ -282,8 +282,8 @@ class RayDistributedExecutor(Executor):
|
|||||||
# driver_dummy_worker can be None when using ray spmd worker.
|
# driver_dummy_worker can be None when using ray spmd worker.
|
||||||
continue
|
continue
|
||||||
worker_node_and_gpu_ids.append(
|
worker_node_and_gpu_ids.append(
|
||||||
ray.get(worker.get_node_and_gpu_ids.remote())
|
ray.get(worker.get_node_and_gpu_ids.remote()) # type: ignore[attr-defined]
|
||||||
) # type: ignore[attr-defined]
|
)
|
||||||
|
|
||||||
node_workers = defaultdict(list) # node id -> list of worker ranks
|
node_workers = defaultdict(list) # node id -> list of worker ranks
|
||||||
node_gpus = defaultdict(list) # node id -> list of gpu ids
|
node_gpus = defaultdict(list) # node id -> list of gpu ids
|
||||||
|
|||||||
Reference in New Issue
Block a user