diff --git a/tools/pre_commit/mypy.py b/tools/pre_commit/mypy.py index b2f70f184..7d4b37305 100755 --- a/tools/pre_commit/mypy.py +++ b/tools/pre_commit/mypy.py @@ -41,7 +41,6 @@ EXCLUDE = [ # TODO: Remove these entries after fixing mypy errors. "vllm/benchmarks", "vllm/config", - "vllm/device_allocator", "vllm/reasoning", "vllm/tool_parser", ] diff --git a/vllm/device_allocator/cumem.py b/vllm/device_allocator/cumem.py index 2f97288b6..554a34b6a 100644 --- a/vllm/device_allocator/cumem.py +++ b/vllm/device_allocator/cumem.py @@ -11,7 +11,7 @@ import dataclasses import gc import os -from collections.abc import Callable +from collections.abc import Callable, Iterator from contextlib import contextmanager from typing import Any @@ -25,6 +25,7 @@ logger = init_logger(__name__) cumem_available = False +libcudart: Any = None try: from vllm.cumem_allocator import ( init_module, @@ -41,9 +42,7 @@ except ModuleNotFoundError: init_module = None python_create_and_map = None python_unmap_and_release = None - CudaRTLibrary = None lib_name = None - libcudart = None # py_device, py_alignedSize, py_d_mem, py_p_memHandle HandleType = tuple[int, int, int, int] @@ -65,7 +64,8 @@ def unmap_and_release(allocation_handle: HandleType) -> None: def get_pluggable_allocator( - python_malloc_fn: Callable[[int], int], python_free_func: Callable[[int, int], None] + python_malloc_fn: Callable[[HandleType], None], + python_free_func: Callable[[int], HandleType], ) -> torch.cuda.memory.CUDAPluggableAllocator: init_module(python_malloc_fn, python_free_func) new_alloc = torch.cuda.memory.CUDAPluggableAllocator( @@ -76,8 +76,11 @@ def get_pluggable_allocator( @contextmanager def use_memory_pool_with_allocator( - python_malloc_fn: Callable[[int], int], python_free_func: Callable[[int, int], None] -) -> None: + python_malloc_fn: Callable[[HandleType], None], + python_free_func: Callable[[int], HandleType], +) -> Iterator[ + tuple[torch.cuda.memory.MemPool, torch.cuda.memory.CUDAPluggableAllocator] +]: new_alloc = get_pluggable_allocator(python_malloc_fn, python_free_func) mem_pool = torch.cuda.memory.MemPool(new_alloc._allocator) with torch.cuda.memory.use_mem_pool(mem_pool): @@ -109,7 +112,7 @@ class CuMemAllocator: not work as expected. """ - instance: "CuMemAllocator" = None + instance: "CuMemAllocator | None" = None default_tag: str = "default" @staticmethod