[Core] Profiler improvements and lazy initialization (#33198)

Signed-off-by: Jaewon Lee <jaewon@meta.com>
Co-authored-by: Lu Fang <30275821+houseroad@users.noreply.github.com>
This commit is contained in:
Jaewon
2026-02-12 16:16:38 -08:00
committed by GitHub
parent aa181c923b
commit 4453ba8d9e
10 changed files with 117 additions and 37 deletions

View File

@@ -103,20 +103,14 @@ class Worker(WorkerBase):
)
# Torch/CUDA profiler. Enabled and configured through profiler_config.
# Profiler wrapper is created lazily in profile() when start is called,
# so we have all the information needed for proper trace naming.
self.profiler: Any | None = None
profiler_config = vllm_config.profiler_config
if profiler_config.profiler == "torch":
worker_name = f"{vllm_config.instance_id}-rank-{self.rank}"
self.profiler = TorchProfilerWrapper(
profiler_config,
worker_name=worker_name,
local_rank=self.local_rank,
activities=["CPU", "CUDA"],
)
elif profiler_config.profiler == "cuda":
self.profiler = CudaProfilerWrapper(profiler_config)
else:
self.profiler = None
self.profiler_config = vllm_config.profiler_config
# Only validate profiler config is valid, don't instantiate yet
if self.profiler_config.profiler not in ("torch", "cuda", None):
raise ValueError(f"Unknown profiler type: {self.profiler_config.profiler}")
self.use_v2_model_runner = envs.VLLM_USE_V2_MODEL_RUNNER
@@ -677,17 +671,52 @@ class Worker(WorkerBase):
def take_draft_token_ids(self) -> DraftTokenIds | None:
return self.model_runner.take_draft_token_ids()
def profile(self, is_start: bool = True):
if self.profiler is None:
def profile(self, is_start: bool = True, profile_prefix: str | None = None):
# Check if profiling is enabled
if self.profiler_config is None or self.profiler_config.profiler is None:
raise RuntimeError(
"Profiling is not enabled. Please set --profiler-config to enable "
"profiling. Example: "
"'--profiler-config.profiler=torch --profiler-config.torch_profiler_dir"
"=YOUR_DIR_PATH_TO_DUMP_TRACE'"
)
if is_start:
self.profiler.start()
# Generate the trace name by combining prefix with comprehensive rank suffix
from vllm.distributed.utils import get_worker_rank_suffix
rank_suffix = get_worker_rank_suffix(global_rank=self.rank)
# Build the full trace name
if profile_prefix:
trace_name = f"{profile_prefix}_{rank_suffix}"
else:
trace_name = rank_suffix
# Create the profiler wrapper only on the first start call
if self.profiler is None:
if self.profiler_config.profiler == "torch":
self.profiler = TorchProfilerWrapper(
self.profiler_config,
worker_name=trace_name,
local_rank=self.local_rank,
activities=["CPU", "CUDA"],
)
logger.debug(
"Starting torch profiler with trace name: %s", trace_name
)
elif self.profiler_config.profiler == "cuda":
self.profiler = CudaProfilerWrapper(self.profiler_config)
logger.debug("Starting CUDA profiler")
self.profiler.start()
else:
# Profiler already initialized. Restart profiling but keep
# the original trace name from the first initialization.
self.profiler.start()
else:
if self.profiler is None:
logger.warning("Profiler was not started, nothing to stop.")
return
self.profiler.stop()
def execute_dummy_batch(self) -> None: