[Core] Profiler improvements and lazy initialization (#33198)

Signed-off-by: Jaewon Lee <jaewon@meta.com> Co-authored-by: Lu Fang <30275821+houseroad@users.noreply.github.com>
2026-02-12 16:16:38 -08:00
parent aa181c923b
commit 4453ba8d9e
10 changed files with 117 additions and 37 deletions
--- a/vllm/v1/engine/async_llm.py
+++ b/vllm/v1/engine/async_llm.py
@@ -911,8 +911,8 @@ class AsyncLLM(EngineClient):
        if self.errored:
            raise self.dead_error

-    async def start_profile(self) -> None:
-        coros = [self.engine_core.profile_async(True)]
+    async def start_profile(self, profile_prefix: str | None = None) -> None:
+        coros = [self.engine_core.profile_async(True, profile_prefix)]
        if self.profiler is not None:
            coros.append(asyncio.to_thread(self.profiler.start))
        await asyncio.gather(*coros)
--- a/vllm/v1/engine/core.py
+++ b/vllm/v1/engine/core.py
@@ -568,8 +568,8 @@ class EngineCore:
        if self.scheduler:
            self.scheduler.shutdown()

-    def profile(self, is_start: bool = True):
-        self.model_executor.profile(is_start)
+    def profile(self, is_start: bool = True, profile_prefix: str | None = None):
+        self.model_executor.profile(is_start, profile_prefix)

    def reset_mm_cache(self):
        # NOTE: Since this is mainly for debugging, we don't attempt to
--- a/vllm/v1/engine/core_client.py
+++ b/vllm/v1/engine/core_client.py
@@ -135,7 +135,7 @@ class EngineCoreClient(ABC):
    def add_request(self, request: EngineCoreRequest) -> None:
        raise NotImplementedError

-    def profile(self, is_start: bool = True) -> None:
+    def profile(self, is_start: bool = True, profile_prefix: str | None = None) -> None:
        raise NotImplementedError

    def reset_mm_cache(self) -> None:
@@ -210,7 +210,9 @@ class EngineCoreClient(ABC):
    async def add_request_async(self, request: EngineCoreRequest) -> None:
        raise NotImplementedError

-    async def profile_async(self, is_start: bool = True) -> None:
+    async def profile_async(
+        self, is_start: bool = True, profile_prefix: str | None = None
+    ) -> None:
        raise NotImplementedError

    async def reset_mm_cache_async(self) -> None:
@@ -295,8 +297,8 @@ class InprocClient(EngineCoreClient):
    def shutdown(self) -> None:
        self.engine_core.shutdown()

-    def profile(self, is_start: bool = True) -> None:
-        self.engine_core.profile(is_start)
+    def profile(self, is_start: bool = True, profile_prefix: str | None = None) -> None:
+        self.engine_core.profile(is_start, profile_prefix)

    def reset_mm_cache(self) -> None:
        self.engine_core.reset_mm_cache()
@@ -765,8 +767,8 @@ class SyncMPClient(MPClient):
        if request_ids and not self.resources.engine_dead:
            self._send_input(EngineCoreRequestType.ABORT, request_ids)

-    def profile(self, is_start: bool = True) -> None:
-        self.call_utility("profile", is_start)
+    def profile(self, is_start: bool = True, profile_prefix: str | None = None) -> None:
+        self.call_utility("profile", is_start, profile_prefix)

    def reset_mm_cache(self) -> None:
        self.call_utility("reset_mm_cache")
@@ -987,8 +989,10 @@ class AsyncMPClient(MPClient):
        """Resume the scheduler after a pause."""
        await self.call_utility_async("resume_scheduler")

-    async def profile_async(self, is_start: bool = True) -> None:
-        await self.call_utility_async("profile", is_start)
+    async def profile_async(
+        self, is_start: bool = True, profile_prefix: str | None = None
+    ) -> None:
+        await self.call_utility_async("profile", is_start, profile_prefix)

    async def reset_mm_cache_async(self) -> None:
        await self.call_utility_async("reset_mm_cache")
--- a/vllm/v1/engine/llm_engine.py
+++ b/vllm/v1/engine/llm_engine.py
@@ -326,8 +326,8 @@ class LLMEngine:

        return processed_outputs.request_outputs

-    def start_profile(self):
-        self.engine_core.profile(True)
+    def start_profile(self, profile_prefix: str | None = None):
+        self.engine_core.profile(True, profile_prefix)

    def stop_profile(self):
        self.engine_core.profile(False)