[Core] Add torch profiler CPU traces for AsyncLLM. (#21794)
Signed-off-by: Chenheli Hua <huachenheli@outlook.com>
This commit is contained in:
@@ -667,8 +667,10 @@ environment_variables: dict[str, Callable[[], Any]] = {
|
|||||||
"VLLM_LORA_RESOLVER_CACHE_DIR":
|
"VLLM_LORA_RESOLVER_CACHE_DIR":
|
||||||
lambda: os.getenv("VLLM_LORA_RESOLVER_CACHE_DIR", None),
|
lambda: os.getenv("VLLM_LORA_RESOLVER_CACHE_DIR", None),
|
||||||
|
|
||||||
# Enables torch profiler if set. Path to the directory where torch profiler
|
# Enables torch profiler if set.
|
||||||
# traces are saved. Note that it must be an absolute path.
|
# Both AsyncLLM's CPU traces as well as workers'
|
||||||
|
# traces (CPU & GPU) will be saved under this directory.
|
||||||
|
# Note that it must be an absolute path.
|
||||||
"VLLM_TORCH_PROFILER_DIR":
|
"VLLM_TORCH_PROFILER_DIR":
|
||||||
lambda: (None if os.getenv("VLLM_TORCH_PROFILER_DIR", None) is None else os
|
lambda: (None if os.getenv("VLLM_TORCH_PROFILER_DIR", None) is None else os
|
||||||
.path.expanduser(os.getenv("VLLM_TORCH_PROFILER_DIR", "."))),
|
.path.expanduser(os.getenv("VLLM_TORCH_PROFILER_DIR", "."))),
|
||||||
|
|||||||
@@ -1,12 +1,15 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import os
|
||||||
|
import socket
|
||||||
import time
|
import time
|
||||||
from collections.abc import AsyncGenerator, Iterable, Mapping
|
from collections.abc import AsyncGenerator, Iterable, Mapping
|
||||||
from copy import copy
|
from copy import copy
|
||||||
from typing import Any, Optional, Union
|
from typing import Any, Optional, Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
|
||||||
import vllm.envs as envs
|
import vllm.envs as envs
|
||||||
from vllm.config import ModelConfig, VllmConfig
|
from vllm.config import ModelConfig, VllmConfig
|
||||||
@@ -144,6 +147,26 @@ class AsyncLLM(EngineClient):
|
|||||||
except RuntimeError:
|
except RuntimeError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
if envs.VLLM_TORCH_PROFILER_DIR:
|
||||||
|
logger.info(
|
||||||
|
"Torch profiler enabled. AsyncLLM CPU traces will be collected under %s", # noqa: E501
|
||||||
|
envs.VLLM_TORCH_PROFILER_DIR)
|
||||||
|
worker_name = f"{socket.gethostname()}_{os.getpid()}.async_llm"
|
||||||
|
self.profiler = torch.profiler.profile(
|
||||||
|
activities=[
|
||||||
|
torch.profiler.ProfilerActivity.CPU,
|
||||||
|
],
|
||||||
|
with_stack=envs.VLLM_TORCH_PROFILER_WITH_STACK,
|
||||||
|
on_trace_ready=torch.profiler.tensorboard_trace_handler(
|
||||||
|
envs.VLLM_TORCH_PROFILER_DIR,
|
||||||
|
worker_name=worker_name,
|
||||||
|
use_gzip=True))
|
||||||
|
else:
|
||||||
|
logger.info(
|
||||||
|
"Torch profiler disabled. AsyncLLM CPU traces will not be collected." # noqa: E501
|
||||||
|
)
|
||||||
|
self.profiler = None
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@deprecate_kwargs(
|
@deprecate_kwargs(
|
||||||
"disable_log_requests",
|
"disable_log_requests",
|
||||||
@@ -562,10 +585,16 @@ class AsyncLLM(EngineClient):
|
|||||||
raise self.dead_error
|
raise self.dead_error
|
||||||
|
|
||||||
async def start_profile(self) -> None:
|
async def start_profile(self) -> None:
|
||||||
await self.engine_core.profile_async(True)
|
coros = [self.engine_core.profile_async(True)]
|
||||||
|
if self.profiler is not None:
|
||||||
|
coros.append(asyncio.to_thread(self.profiler.start))
|
||||||
|
await asyncio.gather(*coros)
|
||||||
|
|
||||||
async def stop_profile(self) -> None:
|
async def stop_profile(self) -> None:
|
||||||
await self.engine_core.profile_async(False)
|
coros = [self.engine_core.profile_async(False)]
|
||||||
|
if self.profiler is not None:
|
||||||
|
coros.append(asyncio.to_thread(self.profiler.stop))
|
||||||
|
await asyncio.gather(*coros)
|
||||||
|
|
||||||
async def reset_mm_cache(self) -> None:
|
async def reset_mm_cache(self) -> None:
|
||||||
self.processor.mm_registry.reset_processor_cache(self.model_config)
|
self.processor.mm_registry.reset_processor_cache(self.model_config)
|
||||||
|
|||||||
Reference in New Issue
Block a user