[Frontend][torch.compile] CompilationConfig Overhaul (#20283): Set up -O infrastructure (#26847)

Signed-off-by: morrison-turnansky <mturnans@redhat.com> Signed-off-by: adabeyta <aabeyta@redhat.com> Signed-off-by: Morrison Turnansky <mturnans@redhat.com> Co-authored-by: adabeyta <aabeyta@redhat.com> Co-authored-by: Luka Govedič <ProExpertProg@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-11-27 04:55:58 -05:00
parent 00d3310d2d
commit 0838b52e2e
13 changed files with 735 additions and 64 deletions
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -77,6 +77,7 @@ from vllm.config.observability import DetailedTraceModules
 from vllm.config.parallel import DistributedExecutorBackend, ExpertPlacementStrategy
 from vllm.config.scheduler import SchedulerPolicy
 from vllm.config.utils import get_field
+from vllm.config.vllm import OptimizationLevel
 from vllm.logger import init_logger, suppress_logging
 from vllm.platforms import CpuArchEnum, current_platform
 from vllm.plugins import load_general_plugins
@@ -560,6 +561,7 @@ class EngineArgs:
    stream_interval: int = SchedulerConfig.stream_interval

    kv_sharing_fast_prefill: bool = CacheConfig.kv_sharing_fast_prefill
+    optimization_level: OptimizationLevel = VllmConfig.optimization_level

    kv_offloading_size: float | None = CacheConfig.kv_offloading_size
    kv_offloading_backend: KVOffloadingBackend | None = (
@@ -1114,6 +1116,10 @@ class EngineArgs:
            "--structured-outputs-config", **vllm_kwargs["structured_outputs_config"]
        )

+        vllm_group.add_argument(
+            "--optimization-level", **vllm_kwargs["optimization_level"]
+        )
+
        # Other arguments
        parser.add_argument(
            "--disable-log-stats",
@@ -1733,7 +1739,6 @@ class EngineArgs:
            compilation_config.max_cudagraph_capture_size = (
                self.max_cudagraph_capture_size
            )
-
        config = VllmConfig(
            model_config=model_config,
            cache_config=cache_config,
@@ -1750,6 +1755,7 @@ class EngineArgs:
            kv_events_config=self.kv_events_config,
            ec_transfer_config=self.ec_transfer_config,
            additional_config=self.additional_config,
+            optimization_level=self.optimization_level,
        )

        return config