[Core] Profiler improvements and lazy initialization (#33198)
Signed-off-by: Jaewon Lee <jaewon@meta.com> Co-authored-by: Lu Fang <30275821+houseroad@users.noreply.github.com>
This commit is contained in:
@@ -103,20 +103,14 @@ class Worker(WorkerBase):
|
||||
)
|
||||
|
||||
# Torch/CUDA profiler. Enabled and configured through profiler_config.
|
||||
# Profiler wrapper is created lazily in profile() when start is called,
|
||||
# so we have all the information needed for proper trace naming.
|
||||
self.profiler: Any | None = None
|
||||
profiler_config = vllm_config.profiler_config
|
||||
if profiler_config.profiler == "torch":
|
||||
worker_name = f"{vllm_config.instance_id}-rank-{self.rank}"
|
||||
self.profiler = TorchProfilerWrapper(
|
||||
profiler_config,
|
||||
worker_name=worker_name,
|
||||
local_rank=self.local_rank,
|
||||
activities=["CPU", "CUDA"],
|
||||
)
|
||||
elif profiler_config.profiler == "cuda":
|
||||
self.profiler = CudaProfilerWrapper(profiler_config)
|
||||
else:
|
||||
self.profiler = None
|
||||
self.profiler_config = vllm_config.profiler_config
|
||||
|
||||
# Only validate profiler config is valid, don't instantiate yet
|
||||
if self.profiler_config.profiler not in ("torch", "cuda", None):
|
||||
raise ValueError(f"Unknown profiler type: {self.profiler_config.profiler}")
|
||||
|
||||
self.use_v2_model_runner = envs.VLLM_USE_V2_MODEL_RUNNER
|
||||
|
||||
@@ -677,17 +671,52 @@ class Worker(WorkerBase):
|
||||
def take_draft_token_ids(self) -> DraftTokenIds | None:
|
||||
return self.model_runner.take_draft_token_ids()
|
||||
|
||||
def profile(self, is_start: bool = True):
|
||||
if self.profiler is None:
|
||||
def profile(self, is_start: bool = True, profile_prefix: str | None = None):
|
||||
# Check if profiling is enabled
|
||||
if self.profiler_config is None or self.profiler_config.profiler is None:
|
||||
raise RuntimeError(
|
||||
"Profiling is not enabled. Please set --profiler-config to enable "
|
||||
"profiling. Example: "
|
||||
"'--profiler-config.profiler=torch --profiler-config.torch_profiler_dir"
|
||||
"=YOUR_DIR_PATH_TO_DUMP_TRACE'"
|
||||
)
|
||||
|
||||
if is_start:
|
||||
self.profiler.start()
|
||||
# Generate the trace name by combining prefix with comprehensive rank suffix
|
||||
from vllm.distributed.utils import get_worker_rank_suffix
|
||||
|
||||
rank_suffix = get_worker_rank_suffix(global_rank=self.rank)
|
||||
|
||||
# Build the full trace name
|
||||
if profile_prefix:
|
||||
trace_name = f"{profile_prefix}_{rank_suffix}"
|
||||
else:
|
||||
trace_name = rank_suffix
|
||||
|
||||
# Create the profiler wrapper only on the first start call
|
||||
if self.profiler is None:
|
||||
if self.profiler_config.profiler == "torch":
|
||||
self.profiler = TorchProfilerWrapper(
|
||||
self.profiler_config,
|
||||
worker_name=trace_name,
|
||||
local_rank=self.local_rank,
|
||||
activities=["CPU", "CUDA"],
|
||||
)
|
||||
logger.debug(
|
||||
"Starting torch profiler with trace name: %s", trace_name
|
||||
)
|
||||
elif self.profiler_config.profiler == "cuda":
|
||||
self.profiler = CudaProfilerWrapper(self.profiler_config)
|
||||
logger.debug("Starting CUDA profiler")
|
||||
self.profiler.start()
|
||||
else:
|
||||
# Profiler already initialized. Restart profiling but keep
|
||||
# the original trace name from the first initialization.
|
||||
self.profiler.start()
|
||||
else:
|
||||
if self.profiler is None:
|
||||
logger.warning("Profiler was not started, nothing to stop.")
|
||||
return
|
||||
self.profiler.stop()
|
||||
|
||||
def execute_dummy_batch(self) -> None:
|
||||
|
||||
Reference in New Issue
Block a user