[Core] Shared memory based object store for Multimodal data caching and IPC (#20452)
Signed-off-by: donglu <donglu@cohere.com>
This commit is contained in:
@@ -14,6 +14,7 @@ from enum import Enum, auto
|
||||
from functools import partial
|
||||
from multiprocessing.connection import Connection
|
||||
from multiprocessing.process import BaseProcess
|
||||
from multiprocessing.synchronize import Lock as LockType
|
||||
from threading import Thread
|
||||
from typing import Any, Callable, Optional, Union, cast
|
||||
|
||||
@@ -31,10 +32,13 @@ from vllm.distributed.parallel_state import (get_dp_group, get_ep_group,
|
||||
from vllm.executor.multiproc_worker_utils import (
|
||||
set_multiprocessing_worker_envs)
|
||||
from vllm.logger import init_logger
|
||||
from vllm.multimodal import MULTIMODAL_REGISTRY
|
||||
from vllm.multimodal.cache import worker_receiver_cache_from_config
|
||||
from vllm.utils import (decorate_logs, get_distributed_init_method,
|
||||
get_loopback_ip, get_mp_context, get_open_port,
|
||||
set_process_title)
|
||||
from vllm.v1.executor.abstract import Executor, FailureCallback
|
||||
from vllm.v1.executor.utils import get_and_update_mm_cache
|
||||
from vllm.v1.outputs import (AsyncModelRunnerOutput, DraftTokenIds,
|
||||
ModelRunnerOutput)
|
||||
from vllm.worker.worker_base import WorkerWrapperBase
|
||||
@@ -81,6 +85,8 @@ class MultiprocExecutor(Executor):
|
||||
scheduler_output_handle = self.rpc_broadcast_mq.export_handle()
|
||||
|
||||
# Create workers
|
||||
context = get_mp_context()
|
||||
shared_worker_lock = context.Lock()
|
||||
unready_workers: list[UnreadyWorkerProcHandle] = []
|
||||
success = False
|
||||
try:
|
||||
@@ -92,6 +98,7 @@ class MultiprocExecutor(Executor):
|
||||
rank=rank,
|
||||
distributed_init_method=distributed_init_method,
|
||||
input_shm_handle=scheduler_output_handle,
|
||||
shared_worker_lock=shared_worker_lock,
|
||||
))
|
||||
|
||||
# Workers must be created before wait_for_ready to avoid
|
||||
@@ -380,6 +387,7 @@ class WorkerProc:
|
||||
rank: int,
|
||||
distributed_init_method: str,
|
||||
input_shm_handle: Handle,
|
||||
shared_worker_lock: LockType,
|
||||
):
|
||||
self.rank = rank
|
||||
wrapper = WorkerWrapperBase(vllm_config=vllm_config, rpc_rank=rank)
|
||||
@@ -416,6 +424,10 @@ class WorkerProc:
|
||||
name="WorkerAsyncOutputCopy")
|
||||
self.async_output_copy_thread.start()
|
||||
|
||||
# Initialize multimodal receiver cache if needed
|
||||
self.mm_receiver_cache = worker_receiver_cache_from_config(
|
||||
vllm_config, MULTIMODAL_REGISTRY, shared_worker_lock)
|
||||
|
||||
# Initialize device
|
||||
self.worker.init_device()
|
||||
|
||||
@@ -428,11 +440,12 @@ class WorkerProc:
|
||||
|
||||
@staticmethod
|
||||
def make_worker_process(
|
||||
vllm_config: VllmConfig,
|
||||
local_rank: int,
|
||||
rank: int,
|
||||
distributed_init_method: str,
|
||||
input_shm_handle, # Receive SchedulerOutput
|
||||
vllm_config: VllmConfig,
|
||||
local_rank: int,
|
||||
rank: int,
|
||||
distributed_init_method: str,
|
||||
input_shm_handle, # Receive SchedulerOutput
|
||||
shared_worker_lock: LockType,
|
||||
) -> UnreadyWorkerProcHandle:
|
||||
context = get_mp_context()
|
||||
# (reader, writer)
|
||||
@@ -449,6 +462,7 @@ class WorkerProc:
|
||||
"input_shm_handle": input_shm_handle,
|
||||
"ready_pipe": (reader, writer),
|
||||
"death_pipe": death_reader,
|
||||
"shared_worker_lock": shared_worker_lock,
|
||||
}
|
||||
# Run EngineCore busy loop in background process.
|
||||
proc = context.Process(target=WorkerProc.worker_main,
|
||||
@@ -646,6 +660,10 @@ class WorkerProc:
|
||||
func = getattr(self.worker, method)
|
||||
elif isinstance(method, bytes):
|
||||
func = partial(cloudpickle.loads(method), self.worker)
|
||||
# retrieve from shm cache if available
|
||||
if self.mm_receiver_cache is not None \
|
||||
and func.__name__ == "execute_model":
|
||||
get_and_update_mm_cache(self.mm_receiver_cache, args)
|
||||
output = func(*args, **kwargs)
|
||||
except Exception as e:
|
||||
# Notes have been introduced in python 3.11
|
||||
|
||||
Reference in New Issue
Block a user