285 lines
10 KiB
Python
285 lines
10 KiB
Python
|
|
# SPDX-License-Identifier: Apache-2.0
|
|||
|
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
|||
|
|
"""
|
|||
|
|
Tests for resource management in hf3fs_client.py: constructor failure cleanup
|
|||
|
|
and idempotent close(). Tests use mock to replace real I/O operations
|
|||
|
|
(hf3fs_fuse.io, SharedMemory, os, CUDA).
|
|||
|
|
Requires hf3fs_fuse.io to be installed; skipped otherwise.
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
from typing import Any
|
|||
|
|
from unittest.mock import MagicMock, patch
|
|||
|
|
|
|||
|
|
import pytest
|
|||
|
|
|
|||
|
|
HF3FS_AVAILABLE = True
|
|||
|
|
try:
|
|||
|
|
from hf3fs_fuse.io import ( # noqa: F401
|
|||
|
|
deregister_fd,
|
|||
|
|
extract_mount_point,
|
|||
|
|
make_ioring,
|
|||
|
|
make_iovec,
|
|||
|
|
register_fd,
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
from vllm.distributed.kv_transfer.kv_connector.v1.hf3fs.hf3fs_client import (
|
|||
|
|
Hf3fsClient,
|
|||
|
|
)
|
|||
|
|
except Exception:
|
|||
|
|
HF3FS_AVAILABLE = False
|
|||
|
|
|
|||
|
|
requires_hf3fs = pytest.mark.skipif(
|
|||
|
|
not HF3FS_AVAILABLE,
|
|||
|
|
reason="hf3fs_fuse.io is not available on this machine",
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
# Helpers
|
|||
|
|
# ---------------------------------------------------------------------------
|
|||
|
|
|
|||
|
|
|
|||
|
|
class _FakeShm:
|
|||
|
|
"""Shared-memory stub matching the multiprocessing.shared_memory.SharedMemory
|
|||
|
|
interface used by Hf3fsClient:
|
|||
|
|
|
|||
|
|
Attributes accessed by the constructor:
|
|||
|
|
.buf – memoryview / buffer-protocol object consumed by torch.frombuffer
|
|||
|
|
Methods called during normal lifetime:
|
|||
|
|
.unlink() – called right after the iovec is set up
|
|||
|
|
.close() – called in _release_resources()
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
def __init__(self, size: int = 1024):
|
|||
|
|
self._data = bytearray(size)
|
|||
|
|
self.buf = memoryview(self._data)
|
|||
|
|
self.closed = False
|
|||
|
|
self.close_call_count = 0
|
|||
|
|
self.unlink_call_count = 0
|
|||
|
|
|
|||
|
|
def close(self):
|
|||
|
|
self.closed = True
|
|||
|
|
self.close_call_count += 1
|
|||
|
|
|
|||
|
|
def unlink(self):
|
|||
|
|
self.unlink_call_count += 1
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ===========================================================================
|
|||
|
|
# TestHf3fsClientResourceManagement
|
|||
|
|
# ===========================================================================
|
|||
|
|
|
|||
|
|
|
|||
|
|
@requires_hf3fs
|
|||
|
|
class TestHf3fsClientResourceManagement:
|
|||
|
|
"""Tests for constructor failure cleanup and idempotent close()."""
|
|||
|
|
|
|||
|
|
_MOD = "vllm.distributed.kv_transfer.kv_connector.v1.hf3fs.hf3fs_client"
|
|||
|
|
|
|||
|
|
# ------------------------------------------------------------------
|
|||
|
|
# Helper: build a minimal Hf3fsClient bypassing all real I/O so that
|
|||
|
|
# we can fully control its internal state.
|
|||
|
|
# ------------------------------------------------------------------
|
|||
|
|
|
|||
|
|
def _make_client(self, tmp_path):
|
|||
|
|
"""Return a fully-mocked Hf3fsClient with controllable internals."""
|
|||
|
|
fake_shm_r = _FakeShm()
|
|||
|
|
fake_shm_w = _FakeShm()
|
|||
|
|
|
|||
|
|
patcher_list: list[Any] = [
|
|||
|
|
patch(f"{self._MOD}.HF3FS_AVAILABLE", True),
|
|||
|
|
patch(f"{self._MOD}.register_fd"),
|
|||
|
|
patch(f"{self._MOD}.deregister_fd"),
|
|||
|
|
patch(f"{self._MOD}.extract_mount_point", return_value="/mnt/hf3fs"),
|
|||
|
|
patch(f"{self._MOD}.make_ioring", return_value=MagicMock()),
|
|||
|
|
patch(f"{self._MOD}.make_iovec", return_value=MagicMock()),
|
|||
|
|
patch(
|
|||
|
|
"multiprocessing.shared_memory.SharedMemory",
|
|||
|
|
side_effect=[fake_shm_r, fake_shm_w],
|
|||
|
|
),
|
|||
|
|
patch("os.open", return_value=99),
|
|||
|
|
patch("os.ftruncate"),
|
|||
|
|
patch("os.close"),
|
|||
|
|
patch("os.fsync"),
|
|||
|
|
patch("torch.cuda.Stream", return_value=MagicMock()),
|
|||
|
|
patch("torch.frombuffer", return_value=MagicMock()),
|
|||
|
|
patch("torch.empty", return_value=MagicMock()),
|
|||
|
|
]
|
|||
|
|
for p in patcher_list:
|
|||
|
|
p.start()
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
client = Hf3fsClient(
|
|||
|
|
path=str(tmp_path / "test.bin"),
|
|||
|
|
size=1024,
|
|||
|
|
bytes_per_page=256,
|
|||
|
|
entries=4,
|
|||
|
|
)
|
|||
|
|
finally:
|
|||
|
|
for p in patcher_list:
|
|||
|
|
p.stop()
|
|||
|
|
|
|||
|
|
# Manually point internal handles to our controllable fakes so that
|
|||
|
|
# assertions after close() can inspect them directly.
|
|||
|
|
client.shm_r = fake_shm_r
|
|||
|
|
client.shm_w = fake_shm_w
|
|||
|
|
client.file = 99
|
|||
|
|
return client, fake_shm_r, fake_shm_w
|
|||
|
|
|
|||
|
|
# ------------------------------------------------------------------
|
|||
|
|
# close() idempotency
|
|||
|
|
# ------------------------------------------------------------------
|
|||
|
|
|
|||
|
|
def test_close_idempotent_and_handles_cleared(self, tmp_path):
|
|||
|
|
"""Multiple close() calls must not raise; deregister_fd called exactly
|
|||
|
|
once, all handles set to None, shm.close() invoked."""
|
|||
|
|
client, shm_r, shm_w = self._make_client(tmp_path)
|
|||
|
|
|
|||
|
|
with (
|
|||
|
|
patch(f"{self._MOD}.deregister_fd") as mock_dereg,
|
|||
|
|
patch("os.close"),
|
|||
|
|
):
|
|||
|
|
client.close() # first close
|
|||
|
|
client.close() # second close — must be no-op
|
|||
|
|
client.close() # third close — must be no-op
|
|||
|
|
|
|||
|
|
assert client._closed is True
|
|||
|
|
assert mock_dereg.call_count == 1, (
|
|||
|
|
f"deregister_fd called {mock_dereg.call_count} times; expected 1"
|
|||
|
|
)
|
|||
|
|
for attr in ("iov_r", "iov_w", "ior_r", "ior_w", "shm_r", "shm_w", "file"):
|
|||
|
|
assert getattr(client, attr) is None, f"{attr} should be None after close()"
|
|||
|
|
assert shm_r.closed is True
|
|||
|
|
assert shm_w.closed is True
|
|||
|
|
|
|||
|
|
def test_flush_after_close_is_noop(self, tmp_path):
|
|||
|
|
"""flush() after close() must silently do nothing (no fsync call)."""
|
|||
|
|
client, _, _ = self._make_client(tmp_path)
|
|||
|
|
|
|||
|
|
with (
|
|||
|
|
patch(f"{self._MOD}.deregister_fd"),
|
|||
|
|
patch("os.close"),
|
|||
|
|
patch("os.fsync") as mock_fsync,
|
|||
|
|
):
|
|||
|
|
client.close()
|
|||
|
|
client.flush()
|
|||
|
|
|
|||
|
|
mock_fsync.assert_not_called()
|
|||
|
|
|
|||
|
|
# ------------------------------------------------------------------
|
|||
|
|
# Constructor failure leaves no leaked resources
|
|||
|
|
# ------------------------------------------------------------------
|
|||
|
|
|
|||
|
|
def test_constructor_failure_after_file_open_cleans_file(self, tmp_path):
|
|||
|
|
"""If the constructor raises after os.open(), the fd must be closed."""
|
|||
|
|
with (
|
|||
|
|
patch(f"{self._MOD}.HF3FS_AVAILABLE", True),
|
|||
|
|
patch(f"{self._MOD}.register_fd"),
|
|||
|
|
patch(f"{self._MOD}.deregister_fd"),
|
|||
|
|
patch(
|
|||
|
|
f"{self._MOD}.extract_mount_point",
|
|||
|
|
side_effect=RuntimeError("mount point not found"),
|
|||
|
|
),
|
|||
|
|
patch("os.open", return_value=55),
|
|||
|
|
patch("os.ftruncate"),
|
|||
|
|
patch("os.close") as mock_os_close,
|
|||
|
|
patch("torch.cuda.Stream", return_value=MagicMock()),
|
|||
|
|
pytest.raises(RuntimeError, match="mount point not found"),
|
|||
|
|
):
|
|||
|
|
Hf3fsClient(
|
|||
|
|
path=str(tmp_path / "fail.bin"),
|
|||
|
|
size=1024,
|
|||
|
|
bytes_per_page=256,
|
|||
|
|
entries=4,
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
mock_os_close.assert_called_once_with(55)
|
|||
|
|
|
|||
|
|
def test_constructor_failure_after_shm_alloc_closes_shm(self, tmp_path):
|
|||
|
|
"""Constructor raises after SharedMemory creation → both shm objects closed."""
|
|||
|
|
fake_shm_r = _FakeShm()
|
|||
|
|
fake_shm_w = _FakeShm()
|
|||
|
|
|
|||
|
|
with (
|
|||
|
|
patch(f"{self._MOD}.HF3FS_AVAILABLE", True),
|
|||
|
|
patch(f"{self._MOD}.register_fd"),
|
|||
|
|
patch(f"{self._MOD}.deregister_fd"),
|
|||
|
|
patch(f"{self._MOD}.extract_mount_point", return_value="/mnt/hf3fs"),
|
|||
|
|
patch(
|
|||
|
|
"multiprocessing.shared_memory.SharedMemory",
|
|||
|
|
side_effect=[fake_shm_r, fake_shm_w],
|
|||
|
|
),
|
|||
|
|
patch("os.open", return_value=66),
|
|||
|
|
patch("os.ftruncate"),
|
|||
|
|
patch("os.close"),
|
|||
|
|
patch("torch.frombuffer", return_value=MagicMock()),
|
|||
|
|
patch("torch.empty", return_value=MagicMock()),
|
|||
|
|
patch(
|
|||
|
|
f"{self._MOD}.make_ioring",
|
|||
|
|
side_effect=RuntimeError("ioring init failed"),
|
|||
|
|
),
|
|||
|
|
patch(f"{self._MOD}.make_iovec", return_value=MagicMock()),
|
|||
|
|
patch("torch.cuda.Stream", return_value=MagicMock()),
|
|||
|
|
pytest.raises(RuntimeError, match="ioring init failed"),
|
|||
|
|
):
|
|||
|
|
Hf3fsClient(
|
|||
|
|
path=str(tmp_path / "fail2.bin"),
|
|||
|
|
size=1024,
|
|||
|
|
bytes_per_page=256,
|
|||
|
|
entries=4,
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
assert fake_shm_r.closed is True, (
|
|||
|
|
"shm_r was not closed after constructor failure"
|
|||
|
|
)
|
|||
|
|
assert fake_shm_w.closed is True, (
|
|||
|
|
"shm_w was not closed after constructor failure"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
def test_constructor_failure_does_not_close_unallocated_shm(self, tmp_path):
|
|||
|
|
"""Failure before SharedMemory is created must not raise AttributeError
|
|||
|
|
or TypeError from cleanup."""
|
|||
|
|
with (
|
|||
|
|
patch(f"{self._MOD}.HF3FS_AVAILABLE", True),
|
|||
|
|
patch(f"{self._MOD}.register_fd"),
|
|||
|
|
patch(f"{self._MOD}.deregister_fd"),
|
|||
|
|
patch(
|
|||
|
|
f"{self._MOD}.extract_mount_point",
|
|||
|
|
side_effect=RuntimeError("early failure"),
|
|||
|
|
),
|
|||
|
|
patch("os.open", return_value=77),
|
|||
|
|
patch("os.ftruncate"),
|
|||
|
|
patch("os.close"),
|
|||
|
|
patch("torch.cuda.Stream", return_value=MagicMock()),
|
|||
|
|
pytest.raises(RuntimeError, match="early failure"),
|
|||
|
|
):
|
|||
|
|
Hf3fsClient(
|
|||
|
|
path=str(tmp_path / "early_fail.bin"),
|
|||
|
|
size=1024,
|
|||
|
|
bytes_per_page=256,
|
|||
|
|
entries=4,
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# ------------------------------------------------------------------
|
|||
|
|
# _release_resources on already-cleared state must be a no-op
|
|||
|
|
# ------------------------------------------------------------------
|
|||
|
|
|
|||
|
|
def test_release_resources_on_empty_state_is_safe(self, tmp_path):
|
|||
|
|
"""_release_resources() on a fully-cleared client must not raise."""
|
|||
|
|
client, _, _ = self._make_client(tmp_path)
|
|||
|
|
|
|||
|
|
with (
|
|||
|
|
patch(f"{self._MOD}.deregister_fd"),
|
|||
|
|
patch("os.close"),
|
|||
|
|
):
|
|||
|
|
client.close() # clears all handles
|
|||
|
|
|
|||
|
|
with (
|
|||
|
|
patch(f"{self._MOD}.deregister_fd") as mock_dereg2,
|
|||
|
|
patch("os.close") as mock_os_close2,
|
|||
|
|
):
|
|||
|
|
client._release_resources() # must not raise
|
|||
|
|
|
|||
|
|
mock_dereg2.assert_not_called()
|
|||
|
|
mock_os_close2.assert_not_called()
|