[Core] Add torch profiler CPU traces for AsyncLLM. (#21794)

Signed-off-by: Chenheli Hua <huachenheli@outlook.com>
2025-08-19 19:32:47 -07:00
parent d46d417b58
commit e58c5a9768
2 changed files with 35 additions and 4 deletions
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -667,8 +667,10 @@ environment_variables: dict[str, Callable[[], Any]] = {
    "VLLM_LORA_RESOLVER_CACHE_DIR":
    lambda: os.getenv("VLLM_LORA_RESOLVER_CACHE_DIR", None),
-    # Enables torch profiler if set. Path to the directory where torch profiler
+    # Enables torch profiler if set.
-    # traces are saved. Note that it must be an absolute path.
+    # Both AsyncLLM's CPU traces as well as workers'
    # traces (CPU & GPU) will be saved under this directory.
    # Note that it must be an absolute path.
    "VLLM_TORCH_PROFILER_DIR":
    lambda: (None if os.getenv("VLLM_TORCH_PROFILER_DIR", None) is None else os
             .path.expanduser(os.getenv("VLLM_TORCH_PROFILER_DIR", "."))),
--- a/vllm/v1/engine/async_llm.py
+++ b/vllm/v1/engine/async_llm.py
@@ -1,12 +1,15 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import asyncio
 import os
 import socket
 import time
 from collections.abc import AsyncGenerator, Iterable, Mapping
 from copy import copy
 from typing import Any, Optional, Union
 import numpy as np
 import torch
 import vllm.envs as envs
 from vllm.config import ModelConfig, VllmConfig
@@ -144,6 +147,26 @@ class AsyncLLM(EngineClient):
        except RuntimeError:
            pass
        if envs.VLLM_TORCH_PROFILER_DIR:
            logger.info(
                "Torch profiler enabled. AsyncLLM CPU traces will be collected under %s",  # noqa: E501
                envs.VLLM_TORCH_PROFILER_DIR)
            worker_name = f"{socket.gethostname()}_{os.getpid()}.async_llm"
            self.profiler = torch.profiler.profile(
                activities=[
                    torch.profiler.ProfilerActivity.CPU,
                ],
                with_stack=envs.VLLM_TORCH_PROFILER_WITH_STACK,
                on_trace_ready=torch.profiler.tensorboard_trace_handler(
                    envs.VLLM_TORCH_PROFILER_DIR,
                    worker_name=worker_name,
                    use_gzip=True))
        else:
            logger.info(
                "Torch profiler disabled. AsyncLLM CPU traces will not be collected."  # noqa: E501
            )
            self.profiler = None
    @classmethod
    @deprecate_kwargs(
        "disable_log_requests",
@@ -562,10 +585,16 @@ class AsyncLLM(EngineClient):
            raise self.dead_error
    async def start_profile(self) -> None:
-        await self.engine_core.profile_async(True)
+        coros = [self.engine_core.profile_async(True)]
        if self.profiler is not None:
            coros.append(asyncio.to_thread(self.profiler.start))
        await asyncio.gather(*coros)
    async def stop_profile(self) -> None:
-        await self.engine_core.profile_async(False)
+        coros = [self.engine_core.profile_async(False)]
        if self.profiler is not None:
            coros.append(asyncio.to_thread(self.profiler.stop))
        await asyncio.gather(*coros)
    async def reset_mm_cache(self) -> None:
        self.processor.mm_registry.reset_processor_cache(self.model_config)