[V1] EngineCore supports profiling (#10564)
Signed-off-by: Abatom <abzhonghua@gmail.com>
This commit is contained in:
@@ -6,6 +6,7 @@ from typing import TYPE_CHECKING, Optional, Tuple
|
||||
import torch
|
||||
import torch.distributed
|
||||
|
||||
import vllm.envs as envs
|
||||
from vllm.config import CacheConfig, ModelConfig, ParallelConfig, VllmConfig
|
||||
from vllm.distributed import (ensure_model_parallel_initialized,
|
||||
init_distributed_environment,
|
||||
@@ -56,6 +57,22 @@ class Worker:
|
||||
init_cached_hf_modules()
|
||||
|
||||
self.model_runner = GPUModelRunner(vllm_config)
|
||||
# Torch profiler. Enabled and configured through env vars:
|
||||
# VLLM_TORCH_PROFILER_DIR=/path/to/save/trace
|
||||
if envs.VLLM_TORCH_PROFILER_DIR:
|
||||
torch_profiler_trace_dir = envs.VLLM_TORCH_PROFILER_DIR
|
||||
logger.info("Profiling enabled. Traces will be saved to: %s",
|
||||
torch_profiler_trace_dir)
|
||||
self.profiler = torch.profiler.profile(
|
||||
activities=[
|
||||
torch.profiler.ProfilerActivity.CPU,
|
||||
torch.profiler.ProfilerActivity.CUDA,
|
||||
],
|
||||
with_stack=True,
|
||||
on_trace_ready=torch.profiler.tensorboard_trace_handler(
|
||||
torch_profiler_trace_dir, use_gzip=True))
|
||||
else:
|
||||
self.profiler = None
|
||||
|
||||
def initialize(self):
|
||||
if self.device_config.device.type == "cuda":
|
||||
@@ -184,6 +201,14 @@ class Worker:
|
||||
# TODO(woosuk): Send the output to the engine process.
|
||||
return output
|
||||
|
||||
def profile(self, is_start=True):
|
||||
if self.profiler is None:
|
||||
raise RuntimeError("Profiler is not enabled.")
|
||||
if is_start:
|
||||
self.profiler.start()
|
||||
else:
|
||||
self.profiler.stop()
|
||||
|
||||
|
||||
def init_worker_distributed_environment(
|
||||
parallel_config: ParallelConfig,
|
||||
|
||||
Reference in New Issue
Block a user