[V1] Fully Transparent Implementation of CPU Offloading (#15354)

Signed-off-by: youkaichao <youkaichao@gmail.com>
This commit is contained in:
youkaichao
2025-03-31 20:22:34 +08:00
committed by GitHub
parent e7ae3bf3d6
commit 555aa21905
12 changed files with 148 additions and 25 deletions

View File

@@ -795,6 +795,14 @@ def is_pin_memory_available() -> bool:
return current_platform.is_pin_memory_available()
@cache
def is_uva_available() -> bool:
"""Check if Unified Virtual Addressing (UVA) is available."""
# UVA requires pinned memory.
# TODO: Add more requirements for UVA if needed.
return is_pin_memory_available()
class DeviceMemoryProfiler:
def __init__(self, device: Optional[torch.types.Device] = None):
@@ -1645,6 +1653,14 @@ def weak_ref_tensors(
raise ValueError("Invalid type for tensors")
def get_cuda_view_from_cpu_tensor(cpu_tensor: torch.Tensor) -> torch.Tensor:
"""
Get a CUDA view of a CPU tensor using Unified Virtual Addressing (UVA).
"""
assert cpu_tensor.is_pinned(), "CPU tensor must be pinned"
return torch.ops._C.get_cuda_view_from_cpu_tensor(cpu_tensor)
def is_in_doc_build() -> bool:
try:
from sphinx.ext.autodoc.mock import _MockModule