[Metrics] Log multi-modal cache stats and fix reset (#26285)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -33,8 +33,6 @@ from vllm.distributed.parallel_state import (
|
||||
get_tp_group,
|
||||
)
|
||||
from vllm.logger import init_logger
|
||||
from vllm.multimodal import MULTIMODAL_REGISTRY
|
||||
from vllm.multimodal.cache import worker_receiver_cache_from_config
|
||||
from vllm.utils import (
|
||||
_maybe_force_spawn,
|
||||
decorate_logs,
|
||||
@@ -46,7 +44,6 @@ from vllm.utils import (
|
||||
)
|
||||
from vllm.v1.core.sched.output import SchedulerOutput
|
||||
from vllm.v1.executor.abstract import Executor, FailureCallback
|
||||
from vllm.v1.executor.utils import get_and_update_mm_cache
|
||||
from vllm.v1.outputs import AsyncModelRunnerOutput, DraftTokenIds, ModelRunnerOutput
|
||||
from vllm.v1.worker.worker_base import WorkerWrapperBase
|
||||
|
||||
@@ -422,6 +419,7 @@ class WorkerProc:
|
||||
"rank": rank,
|
||||
"distributed_init_method": distributed_init_method,
|
||||
"is_driver_worker": is_driver_worker,
|
||||
"shared_worker_lock": shared_worker_lock,
|
||||
}
|
||||
wrapper.init_worker(all_kwargs)
|
||||
self.worker = wrapper
|
||||
@@ -445,11 +443,6 @@ class WorkerProc:
|
||||
)
|
||||
self.async_output_copy_thread.start()
|
||||
|
||||
# Initialize multimodal receiver cache if needed
|
||||
self.mm_receiver_cache = worker_receiver_cache_from_config(
|
||||
vllm_config, MULTIMODAL_REGISTRY, shared_worker_lock
|
||||
)
|
||||
|
||||
# Initialize device
|
||||
self.worker.init_device()
|
||||
|
||||
@@ -692,12 +685,7 @@ class WorkerProc:
|
||||
func = getattr(self.worker, method)
|
||||
elif isinstance(method, bytes):
|
||||
func = partial(cloudpickle.loads(method), self.worker)
|
||||
# retrieve from shm cache if available
|
||||
if (
|
||||
self.mm_receiver_cache is not None
|
||||
and func.__name__ == "execute_model"
|
||||
):
|
||||
get_and_update_mm_cache(self.mm_receiver_cache, args)
|
||||
|
||||
output = func(*args, **kwargs)
|
||||
except Exception as e:
|
||||
# Notes have been introduced in python 3.11
|
||||
|
||||
Reference in New Issue
Block a user