[Core] Profiler improvements and lazy initialization (#33198)

Signed-off-by: Jaewon Lee <jaewon@meta.com>
Co-authored-by: Lu Fang <30275821+houseroad@users.noreply.github.com>
This commit is contained in:
Jaewon
2026-02-12 16:16:38 -08:00
committed by GitHub
parent aa181c923b
commit 4453ba8d9e
10 changed files with 117 additions and 37 deletions

View File

@@ -911,8 +911,8 @@ class AsyncLLM(EngineClient):
if self.errored:
raise self.dead_error
async def start_profile(self) -> None:
coros = [self.engine_core.profile_async(True)]
async def start_profile(self, profile_prefix: str | None = None) -> None:
coros = [self.engine_core.profile_async(True, profile_prefix)]
if self.profiler is not None:
coros.append(asyncio.to_thread(self.profiler.start))
await asyncio.gather(*coros)

View File

@@ -568,8 +568,8 @@ class EngineCore:
if self.scheduler:
self.scheduler.shutdown()
def profile(self, is_start: bool = True):
self.model_executor.profile(is_start)
def profile(self, is_start: bool = True, profile_prefix: str | None = None):
self.model_executor.profile(is_start, profile_prefix)
def reset_mm_cache(self):
# NOTE: Since this is mainly for debugging, we don't attempt to

View File

@@ -135,7 +135,7 @@ class EngineCoreClient(ABC):
def add_request(self, request: EngineCoreRequest) -> None:
raise NotImplementedError
def profile(self, is_start: bool = True) -> None:
def profile(self, is_start: bool = True, profile_prefix: str | None = None) -> None:
raise NotImplementedError
def reset_mm_cache(self) -> None:
@@ -210,7 +210,9 @@ class EngineCoreClient(ABC):
async def add_request_async(self, request: EngineCoreRequest) -> None:
raise NotImplementedError
async def profile_async(self, is_start: bool = True) -> None:
async def profile_async(
self, is_start: bool = True, profile_prefix: str | None = None
) -> None:
raise NotImplementedError
async def reset_mm_cache_async(self) -> None:
@@ -295,8 +297,8 @@ class InprocClient(EngineCoreClient):
def shutdown(self) -> None:
self.engine_core.shutdown()
def profile(self, is_start: bool = True) -> None:
self.engine_core.profile(is_start)
def profile(self, is_start: bool = True, profile_prefix: str | None = None) -> None:
self.engine_core.profile(is_start, profile_prefix)
def reset_mm_cache(self) -> None:
self.engine_core.reset_mm_cache()
@@ -765,8 +767,8 @@ class SyncMPClient(MPClient):
if request_ids and not self.resources.engine_dead:
self._send_input(EngineCoreRequestType.ABORT, request_ids)
def profile(self, is_start: bool = True) -> None:
self.call_utility("profile", is_start)
def profile(self, is_start: bool = True, profile_prefix: str | None = None) -> None:
self.call_utility("profile", is_start, profile_prefix)
def reset_mm_cache(self) -> None:
self.call_utility("reset_mm_cache")
@@ -987,8 +989,10 @@ class AsyncMPClient(MPClient):
"""Resume the scheduler after a pause."""
await self.call_utility_async("resume_scheduler")
async def profile_async(self, is_start: bool = True) -> None:
await self.call_utility_async("profile", is_start)
async def profile_async(
self, is_start: bool = True, profile_prefix: str | None = None
) -> None:
await self.call_utility_async("profile", is_start, profile_prefix)
async def reset_mm_cache_async(self) -> None:
await self.call_utility_async("reset_mm_cache")

View File

@@ -326,8 +326,8 @@ class LLMEngine:
return processed_outputs.request_outputs
def start_profile(self):
self.engine_core.profile(True)
def start_profile(self, profile_prefix: str | None = None):
self.engine_core.profile(True, profile_prefix)
def stop_profile(self):
self.engine_core.profile(False)