[offloader] v2: Hide weight onloading latency via prefetching (#29941)
Signed-off-by: Ming Yang <minos.future@gmail.com> Signed-off-by: Michael Goin <mgoin64@gmail.com> Co-authored-by: Michael Goin <mgoin64@gmail.com>
This commit is contained in:
@@ -24,6 +24,12 @@ from vllm.config.model import (
|
||||
)
|
||||
from vllm.config.multimodal import MultiModalConfig
|
||||
from vllm.config.observability import ObservabilityConfig
|
||||
from vllm.config.offload import (
|
||||
OffloadBackend,
|
||||
OffloadConfig,
|
||||
PrefetchOffloadConfig,
|
||||
UVAOffloadConfig,
|
||||
)
|
||||
from vllm.config.parallel import EPLBConfig, ParallelConfig
|
||||
from vllm.config.pooler import PoolerConfig
|
||||
from vllm.config.profiler import ProfilerConfig
|
||||
@@ -85,6 +91,11 @@ __all__ = [
|
||||
"MultiModalConfig",
|
||||
# From vllm.config.observability
|
||||
"ObservabilityConfig",
|
||||
# From vllm.config.offload
|
||||
"OffloadBackend",
|
||||
"OffloadConfig",
|
||||
"PrefetchOffloadConfig",
|
||||
"UVAOffloadConfig",
|
||||
# From vllm.config.parallel
|
||||
"EPLBConfig",
|
||||
"ParallelConfig",
|
||||
|
||||
Reference in New Issue
Block a user