[Hardware][TPU][Bugfix] Fix v1 mp profiler (#15409)
Signed-off-by: Siyuan Liu <lsiyuan@google.com>
This commit is contained in:
@@ -66,14 +66,18 @@ class TPUWorker:
|
|||||||
from vllm.utils import init_cached_hf_modules
|
from vllm.utils import init_cached_hf_modules
|
||||||
init_cached_hf_modules()
|
init_cached_hf_modules()
|
||||||
|
|
||||||
|
# Delay profiler initialization to the start of the profiling.
|
||||||
|
# This is because in vLLM V1, MP runtime is initialized before the
|
||||||
|
# TPU Worker is initialized. The profiler server needs to start after
|
||||||
|
# MP runtime is initialized.
|
||||||
self.profiler = None
|
self.profiler = None
|
||||||
|
self.profile_dir = None
|
||||||
if envs.VLLM_TORCH_PROFILER_DIR and self.rank < 1:
|
if envs.VLLM_TORCH_PROFILER_DIR and self.rank < 1:
|
||||||
# For TPU, we can only have 1 active profiler session for 1 profiler
|
# For TPU, we can only have 1 active profiler session for 1 profiler
|
||||||
# server. So we only profile on rank0.
|
# server. So we only profile on rank0.
|
||||||
self.profile_dir = envs.VLLM_TORCH_PROFILER_DIR
|
self.profile_dir = envs.VLLM_TORCH_PROFILER_DIR
|
||||||
logger.info("Profiling enabled. Traces will be saved to: %s",
|
logger.info("Profiling enabled. Traces will be saved to: %s",
|
||||||
self.profile_dir)
|
self.profile_dir)
|
||||||
self.profiler = xp.start_server(9012)
|
|
||||||
|
|
||||||
if self.model_config.seed is None:
|
if self.model_config.seed is None:
|
||||||
self.model_config.seed = 0
|
self.model_config.seed = 0
|
||||||
@@ -168,9 +172,11 @@ class TPUWorker:
|
|||||||
|
|
||||||
def profile(self, is_start: bool = True):
|
def profile(self, is_start: bool = True):
|
||||||
if self.rank < 1:
|
if self.rank < 1:
|
||||||
if self.profiler is None:
|
if self.profile_dir is None:
|
||||||
raise RuntimeError("Profiler is not enabled.")
|
raise RuntimeError("Profiler is not enabled.")
|
||||||
if is_start:
|
if is_start:
|
||||||
|
if self.profiler is None:
|
||||||
|
self.profiler = xp.start_server(9012)
|
||||||
xp.start_trace(self.profile_dir)
|
xp.start_trace(self.profile_dir)
|
||||||
else:
|
else:
|
||||||
xp.stop_trace()
|
xp.stop_trace()
|
||||||
|
|||||||
Reference in New Issue
Block a user