[Frontend][torch.compile] CompilationConfig Overhaul (#20283): name change compilation level to compilation mode, deprecation compilation level (#26355)

Signed-off-by: morrison-turnansky <mturnans@redhat.com> Signed-off-by: Morrison Turnansky <mturnans@redhat.com> Co-authored-by: Luka Govedič <ProExpertProg@users.noreply.github.com>
2025-10-14 22:51:16 -04:00
parent e66d787bce
commit 96b9aa5aa0
42 changed files with 270 additions and 248 deletions
--- a/vllm/platforms/cpu.py
+++ b/vllm/platforms/cpu.py
@@ -247,12 +247,12 @@ class CpuPlatform(Platform):
            parallel_config.enable_dbo = False

        # Note: workaround for v1 gpu_model_runner
-        from vllm.config import CompilationLevel
+        from vllm.config import CompilationMode

        vllm_config.compilation_config.cudagraph_capture_sizes = []

        compilation_config = vllm_config.compilation_config
-        if vllm_config.compilation_config.level == CompilationLevel.PIECEWISE:
+        if vllm_config.compilation_config.mode == CompilationMode.VLLM_COMPILE:
            # Note: vLLM V1 is using PIECEWISE level compilation, which will
            # take time to compile kernels just-in-time with the inductor
            # backend. For CPU CI tests, most of them are executed fast and
@@ -265,7 +265,7 @@ class CpuPlatform(Platform):
            else:
                backend = "inductor"

-            compilation_config.level = CompilationLevel.DYNAMO_ONCE
+            compilation_config.mode = CompilationMode.DYNAMO_TRACE_ONCE
            compilation_config.backend = backend
            compilation_config.inductor_compile_config.update(
                {
@@ -277,7 +277,7 @@ class CpuPlatform(Platform):
            )

        if vllm_config.lora_config is not None:
-            compilation_config.level = CompilationLevel.NO_COMPILATION
+            compilation_config.mode = CompilationMode.NONE

        assert vllm_config.device_config.device_type == "cpu"

--- a/vllm/platforms/tpu.py
+++ b/vllm/platforms/tpu.py
@@ -114,7 +114,7 @@ class TpuPlatform(Platform):

    @classmethod
    def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
-        from vllm.config import CompilationLevel, CUDAGraphMode
+        from vllm.config import CompilationMode, CUDAGraphMode

        cache_config = vllm_config.cache_config
        # For v0, the default block size is 16.
@@ -122,12 +122,13 @@ class TpuPlatform(Platform):
            cache_config.block_size = cast(BlockSize, 16)
        compilation_config = vllm_config.compilation_config

-        # TPU only supports DYNAMO_ONCE compilation level
-        if compilation_config.level != CompilationLevel.DYNAMO_ONCE:
+        # TPU only supports DYNAMO_TRACE_ONCE compilation mode
+        if compilation_config.mode != CompilationMode.DYNAMO_TRACE_ONCE:
            logger.info(
-                "[TPU] Forcing DYNAMO_ONCE compilation level, and disabling cudagraph."
+                "[TPU] Forcing DYNAMO_TRACE_ONCE compilation mode, and\
+                disabling cudagraph."
            )
-            compilation_config.level = CompilationLevel.DYNAMO_ONCE
+            compilation_config.mode = CompilationMode.DYNAMO_TRACE_ONCE

        if (
            compilation_config.cudagraph_mode is None
--- a/vllm/platforms/xpu.py
+++ b/vllm/platforms/xpu.py
@@ -144,7 +144,7 @@ class XPUPlatform(Platform):
            cache_config.block_size = 64

        # lazy import to avoid circular import
-        from vllm.config import CompilationLevel, CUDAGraphMode
+        from vllm.config import CompilationMode, CUDAGraphMode

        compilation_config = vllm_config.compilation_config
        if compilation_config.compile_sizes is None:
@@ -155,7 +155,7 @@ class XPUPlatform(Platform):
        )

        if vllm_config.lora_config is not None:
-            compilation_config.level = CompilationLevel.NO_COMPILATION
+            compilation_config.mode = CompilationMode.NONE

        # check and update parallel config
        parallel_config = vllm_config.parallel_config