[Core] Add torch profiler CPU traces for AsyncLLM. (#21794)

Signed-off-by: Chenheli Hua <huachenheli@outlook.com>
This commit is contained in:
Chenheli Hua
2025-08-19 19:32:47 -07:00
committed by GitHub
parent d46d417b58
commit e58c5a9768
2 changed files with 35 additions and 4 deletions

View File

@@ -667,8 +667,10 @@ environment_variables: dict[str, Callable[[], Any]] = {
"VLLM_LORA_RESOLVER_CACHE_DIR": "VLLM_LORA_RESOLVER_CACHE_DIR":
lambda: os.getenv("VLLM_LORA_RESOLVER_CACHE_DIR", None), lambda: os.getenv("VLLM_LORA_RESOLVER_CACHE_DIR", None),
# Enables torch profiler if set. Path to the directory where torch profiler # Enables torch profiler if set.
# traces are saved. Note that it must be an absolute path. # Both AsyncLLM's CPU traces as well as workers'
# traces (CPU & GPU) will be saved under this directory.
# Note that it must be an absolute path.
"VLLM_TORCH_PROFILER_DIR": "VLLM_TORCH_PROFILER_DIR":
lambda: (None if os.getenv("VLLM_TORCH_PROFILER_DIR", None) is None else os lambda: (None if os.getenv("VLLM_TORCH_PROFILER_DIR", None) is None else os
.path.expanduser(os.getenv("VLLM_TORCH_PROFILER_DIR", "."))), .path.expanduser(os.getenv("VLLM_TORCH_PROFILER_DIR", "."))),

View File

@@ -1,12 +1,15 @@
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import asyncio import asyncio
import os
import socket
import time import time
from collections.abc import AsyncGenerator, Iterable, Mapping from collections.abc import AsyncGenerator, Iterable, Mapping
from copy import copy from copy import copy
from typing import Any, Optional, Union from typing import Any, Optional, Union
import numpy as np import numpy as np
import torch
import vllm.envs as envs import vllm.envs as envs
from vllm.config import ModelConfig, VllmConfig from vllm.config import ModelConfig, VllmConfig
@@ -144,6 +147,26 @@ class AsyncLLM(EngineClient):
except RuntimeError: except RuntimeError:
pass pass
if envs.VLLM_TORCH_PROFILER_DIR:
logger.info(
"Torch profiler enabled. AsyncLLM CPU traces will be collected under %s", # noqa: E501
envs.VLLM_TORCH_PROFILER_DIR)
worker_name = f"{socket.gethostname()}_{os.getpid()}.async_llm"
self.profiler = torch.profiler.profile(
activities=[
torch.profiler.ProfilerActivity.CPU,
],
with_stack=envs.VLLM_TORCH_PROFILER_WITH_STACK,
on_trace_ready=torch.profiler.tensorboard_trace_handler(
envs.VLLM_TORCH_PROFILER_DIR,
worker_name=worker_name,
use_gzip=True))
else:
logger.info(
"Torch profiler disabled. AsyncLLM CPU traces will not be collected." # noqa: E501
)
self.profiler = None
@classmethod @classmethod
@deprecate_kwargs( @deprecate_kwargs(
"disable_log_requests", "disable_log_requests",
@@ -562,10 +585,16 @@ class AsyncLLM(EngineClient):
raise self.dead_error raise self.dead_error
async def start_profile(self) -> None: async def start_profile(self) -> None:
await self.engine_core.profile_async(True) coros = [self.engine_core.profile_async(True)]
if self.profiler is not None:
coros.append(asyncio.to_thread(self.profiler.start))
await asyncio.gather(*coros)
async def stop_profile(self) -> None: async def stop_profile(self) -> None:
await self.engine_core.profile_async(False) coros = [self.engine_core.profile_async(False)]
if self.profiler is not None:
coros.append(asyncio.to_thread(self.profiler.stop))
await asyncio.gather(*coros)
async def reset_mm_cache(self) -> None: async def reset_mm_cache(self) -> None:
self.processor.mm_registry.reset_processor_cache(self.model_config) self.processor.mm_registry.reset_processor_cache(self.model_config)