[offloader] v2: Hide weight onloading latency via prefetching (#29941)

Signed-off-by: Ming Yang <minos.future@gmail.com>
Signed-off-by: Michael Goin <mgoin64@gmail.com>
Co-authored-by: Michael Goin <mgoin64@gmail.com>
This commit is contained in:
Ming Yang
2026-02-25 17:20:59 -08:00
committed by GitHub
parent ed42507f6d
commit 6831650c40
20 changed files with 1550 additions and 131 deletions

View File

@@ -24,6 +24,12 @@ from vllm.config.model import (
)
from vllm.config.multimodal import MultiModalConfig
from vllm.config.observability import ObservabilityConfig
from vllm.config.offload import (
OffloadBackend,
OffloadConfig,
PrefetchOffloadConfig,
UVAOffloadConfig,
)
from vllm.config.parallel import EPLBConfig, ParallelConfig
from vllm.config.pooler import PoolerConfig
from vllm.config.profiler import ProfilerConfig
@@ -85,6 +91,11 @@ __all__ = [
"MultiModalConfig",
# From vllm.config.observability
"ObservabilityConfig",
# From vllm.config.offload
"OffloadBackend",
"OffloadConfig",
"PrefetchOffloadConfig",
"UVAOffloadConfig",
# From vllm.config.parallel
"EPLBConfig",
"ParallelConfig",