Files
vllm/vllm/model_executor/offloader/__init__.py
Ming Yang 6831650c40 [offloader] v2: Hide weight onloading latency via prefetching (#29941)
Signed-off-by: Ming Yang <minos.future@gmail.com>
Signed-off-by: Michael Goin <mgoin64@gmail.com>
Co-authored-by: Michael Goin <mgoin64@gmail.com>
2026-02-25 17:20:59 -08:00

24 lines
602 B
Python

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Model parameter offloading infrastructure."""
from vllm.model_executor.offloader.base import (
BaseOffloader,
NoopOffloader,
create_offloader,
get_offloader,
set_offloader,
)
from vllm.model_executor.offloader.prefetch import PrefetchOffloader
from vllm.model_executor.offloader.uva import UVAOffloader
__all__ = [
"BaseOffloader",
"NoopOffloader",
"UVAOffloader",
"PrefetchOffloader",
"create_offloader",
"get_offloader",
"set_offloader",
]