[V1] EngineCore supports profiling (#10564)

Signed-off-by: Abatom <abzhonghua@gmail.com>
2024-11-23 09:16:15 +08:00
parent 28598f3939
commit d345f409b7
5 changed files with 68 additions and 9 deletions
--- a/vllm/v1/worker/gpu_worker.py
+++ b/vllm/v1/worker/gpu_worker.py
@@ -6,6 +6,7 @@ from typing import TYPE_CHECKING, Optional, Tuple
 import torch
 import torch.distributed

+import vllm.envs as envs
 from vllm.config import CacheConfig, ModelConfig, ParallelConfig, VllmConfig
 from vllm.distributed import (ensure_model_parallel_initialized,
                              init_distributed_environment,
@@ -56,6 +57,22 @@ class Worker:
            init_cached_hf_modules()

        self.model_runner = GPUModelRunner(vllm_config)
+        # Torch profiler. Enabled and configured through env vars:
+        # VLLM_TORCH_PROFILER_DIR=/path/to/save/trace
+        if envs.VLLM_TORCH_PROFILER_DIR:
+            torch_profiler_trace_dir = envs.VLLM_TORCH_PROFILER_DIR
+            logger.info("Profiling enabled. Traces will be saved to: %s",
+                        torch_profiler_trace_dir)
+            self.profiler = torch.profiler.profile(
+                activities=[
+                    torch.profiler.ProfilerActivity.CPU,
+                    torch.profiler.ProfilerActivity.CUDA,
+                ],
+                with_stack=True,
+                on_trace_ready=torch.profiler.tensorboard_trace_handler(
+                    torch_profiler_trace_dir, use_gzip=True))
+        else:
+            self.profiler = None

    def initialize(self):
        if self.device_config.device.type == "cuda":
@@ -184,6 +201,14 @@ class Worker:
        # TODO(woosuk): Send the output to the engine process.
        return output

+    def profile(self, is_start=True):
+        if self.profiler is None:
+            raise RuntimeError("Profiler is not enabled.")
+        if is_start:
+            self.profiler.start()
+        else:
+            self.profiler.stop()
+

 def init_worker_distributed_environment(
    parallel_config: ParallelConfig,