[Cleanup] Refactor profiling env vars into a CLI config (#29912)

Signed-off-by: Benjamin Chislett <bchislett@nvidia.com> Signed-off-by: Benjamin Chislett <chislett.ben@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-12-09 13:29:33 -05:00
parent d471b2aff0
commit e858bfe051
22 changed files with 433 additions and 252 deletions
--- a/examples/offline_inference/simple_profiling.py
+++ b/examples/offline_inference/simple_profiling.py
@@ -1,14 +1,10 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project

-import os
 import time

 from vllm import LLM, SamplingParams

-# enable torch profiler, can also be set on cmd line
-os.environ["VLLM_TORCH_PROFILER_DIR"] = "./vllm_profile"
-
 # Sample prompts.
 prompts = [
    "Hello, my name is",
@@ -22,7 +18,14 @@ sampling_params = SamplingParams(temperature=0.8, top_p=0.95)

 def main():
    # Create an LLM.
-    llm = LLM(model="facebook/opt-125m", tensor_parallel_size=1)
+    llm = LLM(
+        model="facebook/opt-125m",
+        tensor_parallel_size=1,
+        profiler_config={
+            "profiler": "torch",
+            "torch_profiler_dir": "./vllm_profile",
+        },
+    )

    llm.start_profile()