[V1] EngineCore supports profiling (#10564)
Signed-off-by: Abatom <abzhonghua@gmail.com>
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
import multiprocessing
|
||||
import pickle
|
||||
import queue
|
||||
import threading
|
||||
import time
|
||||
@@ -16,7 +17,8 @@ from vllm.logger import init_logger
|
||||
from vllm.usage.usage_lib import UsageContext
|
||||
from vllm.v1.core.scheduler import Scheduler
|
||||
from vllm.v1.engine import (EngineCoreOutput, EngineCoreOutputs,
|
||||
EngineCoreRequest, EngineCoreRequestType)
|
||||
EngineCoreProfile, EngineCoreRequest,
|
||||
EngineCoreRequestType)
|
||||
from vllm.v1.engine.mm_input_mapper import MMInputMapper
|
||||
from vllm.v1.executor.gpu_executor import GPUExecutor
|
||||
from vllm.v1.request import Request, RequestStatus
|
||||
@@ -126,6 +128,9 @@ class EngineCore:
|
||||
scheduler_output, output)
|
||||
return engine_core_outputs
|
||||
|
||||
def profile(self, is_start=True):
|
||||
self.model_executor.worker.profile(is_start)
|
||||
|
||||
|
||||
class EngineCoreProc(EngineCore):
|
||||
"""ZMQ-wrapper for running EngineCore in background process."""
|
||||
@@ -312,11 +317,14 @@ class EngineCoreProc(EngineCore):
|
||||
self._last_logging_time = now
|
||||
|
||||
def _handle_client_request(
|
||||
self, request: Union[EngineCoreRequest, List[str]]) -> None:
|
||||
self, request: Union[EngineCoreRequest, EngineCoreProfile,
|
||||
List[str]]) -> None:
|
||||
"""Handle EngineCoreRequest or EngineCoreABORT from Client."""
|
||||
|
||||
if isinstance(request, EngineCoreRequest):
|
||||
self.add_request(request)
|
||||
elif isinstance(request, EngineCoreProfile):
|
||||
self.model_executor.worker.profile(request.is_start)
|
||||
else:
|
||||
# TODO: make an EngineCoreAbort wrapper
|
||||
assert isinstance(request, list)
|
||||
@@ -341,6 +349,8 @@ class EngineCoreProc(EngineCore):
|
||||
request = decoder_add_req.decode(request_data)
|
||||
elif request_type == EngineCoreRequestType.ABORT.value:
|
||||
request = decoder_abort_req.decode(request_data)
|
||||
elif request_type == EngineCoreRequestType.PROFILE.value:
|
||||
request = pickle.loads(request_data)
|
||||
else:
|
||||
raise ValueError(f"Unknown RequestType: {request_type}")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user