[Core] Profiler improvements and lazy initialization (#33198)
Signed-off-by: Jaewon Lee <jaewon@meta.com> Co-authored-by: Lu Fang <30275821+houseroad@users.noreply.github.com>
This commit is contained in:
@@ -911,8 +911,8 @@ class AsyncLLM(EngineClient):
|
||||
if self.errored:
|
||||
raise self.dead_error
|
||||
|
||||
async def start_profile(self) -> None:
|
||||
coros = [self.engine_core.profile_async(True)]
|
||||
async def start_profile(self, profile_prefix: str | None = None) -> None:
|
||||
coros = [self.engine_core.profile_async(True, profile_prefix)]
|
||||
if self.profiler is not None:
|
||||
coros.append(asyncio.to_thread(self.profiler.start))
|
||||
await asyncio.gather(*coros)
|
||||
|
||||
@@ -568,8 +568,8 @@ class EngineCore:
|
||||
if self.scheduler:
|
||||
self.scheduler.shutdown()
|
||||
|
||||
def profile(self, is_start: bool = True):
|
||||
self.model_executor.profile(is_start)
|
||||
def profile(self, is_start: bool = True, profile_prefix: str | None = None):
|
||||
self.model_executor.profile(is_start, profile_prefix)
|
||||
|
||||
def reset_mm_cache(self):
|
||||
# NOTE: Since this is mainly for debugging, we don't attempt to
|
||||
|
||||
@@ -135,7 +135,7 @@ class EngineCoreClient(ABC):
|
||||
def add_request(self, request: EngineCoreRequest) -> None:
|
||||
raise NotImplementedError
|
||||
|
||||
def profile(self, is_start: bool = True) -> None:
|
||||
def profile(self, is_start: bool = True, profile_prefix: str | None = None) -> None:
|
||||
raise NotImplementedError
|
||||
|
||||
def reset_mm_cache(self) -> None:
|
||||
@@ -210,7 +210,9 @@ class EngineCoreClient(ABC):
|
||||
async def add_request_async(self, request: EngineCoreRequest) -> None:
|
||||
raise NotImplementedError
|
||||
|
||||
async def profile_async(self, is_start: bool = True) -> None:
|
||||
async def profile_async(
|
||||
self, is_start: bool = True, profile_prefix: str | None = None
|
||||
) -> None:
|
||||
raise NotImplementedError
|
||||
|
||||
async def reset_mm_cache_async(self) -> None:
|
||||
@@ -295,8 +297,8 @@ class InprocClient(EngineCoreClient):
|
||||
def shutdown(self) -> None:
|
||||
self.engine_core.shutdown()
|
||||
|
||||
def profile(self, is_start: bool = True) -> None:
|
||||
self.engine_core.profile(is_start)
|
||||
def profile(self, is_start: bool = True, profile_prefix: str | None = None) -> None:
|
||||
self.engine_core.profile(is_start, profile_prefix)
|
||||
|
||||
def reset_mm_cache(self) -> None:
|
||||
self.engine_core.reset_mm_cache()
|
||||
@@ -765,8 +767,8 @@ class SyncMPClient(MPClient):
|
||||
if request_ids and not self.resources.engine_dead:
|
||||
self._send_input(EngineCoreRequestType.ABORT, request_ids)
|
||||
|
||||
def profile(self, is_start: bool = True) -> None:
|
||||
self.call_utility("profile", is_start)
|
||||
def profile(self, is_start: bool = True, profile_prefix: str | None = None) -> None:
|
||||
self.call_utility("profile", is_start, profile_prefix)
|
||||
|
||||
def reset_mm_cache(self) -> None:
|
||||
self.call_utility("reset_mm_cache")
|
||||
@@ -987,8 +989,10 @@ class AsyncMPClient(MPClient):
|
||||
"""Resume the scheduler after a pause."""
|
||||
await self.call_utility_async("resume_scheduler")
|
||||
|
||||
async def profile_async(self, is_start: bool = True) -> None:
|
||||
await self.call_utility_async("profile", is_start)
|
||||
async def profile_async(
|
||||
self, is_start: bool = True, profile_prefix: str | None = None
|
||||
) -> None:
|
||||
await self.call_utility_async("profile", is_start, profile_prefix)
|
||||
|
||||
async def reset_mm_cache_async(self) -> None:
|
||||
await self.call_utility_async("reset_mm_cache")
|
||||
|
||||
@@ -326,8 +326,8 @@ class LLMEngine:
|
||||
|
||||
return processed_outputs.request_outputs
|
||||
|
||||
def start_profile(self):
|
||||
self.engine_core.profile(True)
|
||||
def start_profile(self, profile_prefix: str | None = None):
|
||||
self.engine_core.profile(True, profile_prefix)
|
||||
|
||||
def stop_profile(self):
|
||||
self.engine_core.profile(False)
|
||||
|
||||
Reference in New Issue
Block a user