[Frontend][torch.compile] CompilationConfig Overhaul (#20283): name change compilation level to compilation mode, deprecation compilation level (#26355)

Signed-off-by: morrison-turnansky <mturnans@redhat.com> Signed-off-by: Morrison Turnansky <mturnans@redhat.com> Co-authored-by: Luka Govedič <ProExpertProg@users.noreply.github.com>
2025-10-14 22:51:16 -04:00
parent e66d787bce
commit 96b9aa5aa0
42 changed files with 270 additions and 248 deletions
--- a/tests/v1/cudagraph/test_cudagraph_mode.py
+++ b/tests/v1/cudagraph/test_cudagraph_mode.py
@@ -10,7 +10,7 @@ import pytest
 from tests.utils import wait_for_gpu_memory_to_clear
 from tests.v1.attention.utils import full_cg_backend_configs as backend_configs
 from vllm import LLM
-from vllm.config import CompilationConfig
+from vllm.config import CompilationConfig, CompilationMode
 from vllm.platforms import current_platform


@@ -73,7 +73,7 @@ def test_backend_and_cudagraph_mode_combo(backend_name, cudagraph_mode, supporte
            gpu_memory_utilization=0.45,
            max_model_len=1024,
            compilation_config=CompilationConfig(
-                level=3, cudagraph_mode=cudagraph_mode
+                mode=CompilationMode.VLLM_COMPILE, cudagraph_mode=cudagraph_mode
            ),
        )
        llm.generate(["Hello, my name is"] * 10)
@@ -90,32 +90,27 @@ def test_backend_and_cudagraph_mode_combo(backend_name, cudagraph_mode, supporte
    )


-# test cudagraph_mode with different compilation level.
-# (backend_name, cudagraph_mode, compilation_level, supported)
+# test cudagraph_mode with different compilation mode.
+# (backend_name, cudagraph_mode, compilation_mode, supported)
 combo_cases_2 = [
-    ("FA2", "FULL", 0, True),  # no compilation + full cudagraph
-    ("FA2", "FULL", 3, True),  # piecewise compilation + full cudagraph
-    ("FA2", "PIECEWISE", 0, False),  # no compilation + piecewise cudagraph
-    ("FA2", "PIECEWISE", 3, True),  # piecewise compilation + piecewise cudagraph
-    (
-        "FA2",
-        "FULL_AND_PIECEWISE",
-        0,
-        False,
-    ),  # piecewise cudagraph not supported without piecewise compilation
-    ("FA2", "FULL_AND_PIECEWISE", 3, True),
-    ("FA2", "FULL_DECODE_ONLY", 0, True),
-    ("FA2", "FULL_DECODE_ONLY", 3, True),
-    ("FA2", "NONE", 0, True),  # no compilation + no cudagraph
-    ("FA2", "NONE", 3, True),  # piecewise compilation + no cudagraph
+    ("FA2", "FULL", CompilationMode.NONE, True),
+    ("FA2", "FULL", CompilationMode.VLLM_COMPILE, True),
+    ("FA2", "PIECEWISE", CompilationMode.NONE, False),
+    ("FA2", "PIECEWISE", CompilationMode.VLLM_COMPILE, True),
+    ("FA2", "FULL_AND_PIECEWISE", CompilationMode.NONE, False),
+    ("FA2", "FULL_AND_PIECEWISE", CompilationMode.VLLM_COMPILE, True),
+    ("FA2", "FULL_DECODE_ONLY", CompilationMode.NONE, True),
+    ("FA2", "FULL_DECODE_ONLY", CompilationMode.VLLM_COMPILE, True),
+    ("FA2", "NONE", CompilationMode.NONE, True),
+    ("FA2", "NONE", CompilationMode.VLLM_COMPILE, True),
 ]


@pytest.mark.parametrize(
-    "backend_name,cudagraph_mode,compilation_level,supported", combo_cases_2
+    "backend_name,cudagraph_mode,compilation_mode,supported", combo_cases_2
 )
 def test_cudagraph_compilation_combo(combo_case):
-    backend_name, cudagraph_mode, compilation_level, supported = combo_case
+    backend_name, cudagraph_mode, compilation_mode, supported = combo_case

    env_vars = backend_configs[backend_name].env_vars

@@ -130,7 +125,7 @@ def test_cudagraph_compilation_combo(combo_case):
            gpu_memory_utilization=0.45,
            max_model_len=1024,
            compilation_config=CompilationConfig(
-                level=compilation_level, cudagraph_mode=cudagraph_mode
+                mode=compilation_mode, cudagraph_mode=cudagraph_mode
            ),
        )
        llm.generate(["Hello, my name is"] * 10)