[V1] EngineCore supports profiling (#10564)

Signed-off-by: Abatom <abzhonghua@gmail.com>
This commit is contained in:
Zhonghua Deng
2024-11-23 09:16:15 +08:00
committed by GitHub
parent 28598f3939
commit d345f409b7
5 changed files with 68 additions and 9 deletions

View File

@@ -1,4 +1,5 @@
import multiprocessing
import pickle
import queue
import threading
import time
@@ -16,7 +17,8 @@ from vllm.logger import init_logger
from vllm.usage.usage_lib import UsageContext
from vllm.v1.core.scheduler import Scheduler
from vllm.v1.engine import (EngineCoreOutput, EngineCoreOutputs,
EngineCoreRequest, EngineCoreRequestType)
EngineCoreProfile, EngineCoreRequest,
EngineCoreRequestType)
from vllm.v1.engine.mm_input_mapper import MMInputMapper
from vllm.v1.executor.gpu_executor import GPUExecutor
from vllm.v1.request import Request, RequestStatus
@@ -126,6 +128,9 @@ class EngineCore:
scheduler_output, output)
return engine_core_outputs
def profile(self, is_start=True):
self.model_executor.worker.profile(is_start)
class EngineCoreProc(EngineCore):
"""ZMQ-wrapper for running EngineCore in background process."""
@@ -312,11 +317,14 @@ class EngineCoreProc(EngineCore):
self._last_logging_time = now
def _handle_client_request(
self, request: Union[EngineCoreRequest, List[str]]) -> None:
self, request: Union[EngineCoreRequest, EngineCoreProfile,
List[str]]) -> None:
"""Handle EngineCoreRequest or EngineCoreABORT from Client."""
if isinstance(request, EngineCoreRequest):
self.add_request(request)
elif isinstance(request, EngineCoreProfile):
self.model_executor.worker.profile(request.is_start)
else:
# TODO: make an EngineCoreAbort wrapper
assert isinstance(request, list)
@@ -341,6 +349,8 @@ class EngineCoreProc(EngineCore):
request = decoder_add_req.decode(request_data)
elif request_type == EngineCoreRequestType.ABORT.value:
request = decoder_abort_req.decode(request_data)
elif request_type == EngineCoreRequestType.PROFILE.value:
request = pickle.loads(request_data)
else:
raise ValueError(f"Unknown RequestType: {request_type}")