[Frontend][torch.compile] CompilationConfig Overhaul (#20283): name change compilation level to compilation mode, deprecation compilation level (#26355)

Signed-off-by: morrison-turnansky <mturnans@redhat.com>
Signed-off-by: Morrison Turnansky <mturnans@redhat.com>
Co-authored-by: Luka Govedič <ProExpertProg@users.noreply.github.com>
This commit is contained in:
Morrison Turnansky
2025-10-14 22:51:16 -04:00
committed by GitHub
parent e66d787bce
commit 96b9aa5aa0
42 changed files with 270 additions and 248 deletions

View File

@@ -247,12 +247,12 @@ class CpuPlatform(Platform):
parallel_config.enable_dbo = False
# Note: workaround for v1 gpu_model_runner
from vllm.config import CompilationLevel
from vllm.config import CompilationMode
vllm_config.compilation_config.cudagraph_capture_sizes = []
compilation_config = vllm_config.compilation_config
if vllm_config.compilation_config.level == CompilationLevel.PIECEWISE:
if vllm_config.compilation_config.mode == CompilationMode.VLLM_COMPILE:
# Note: vLLM V1 is using PIECEWISE level compilation, which will
# take time to compile kernels just-in-time with the inductor
# backend. For CPU CI tests, most of them are executed fast and
@@ -265,7 +265,7 @@ class CpuPlatform(Platform):
else:
backend = "inductor"
compilation_config.level = CompilationLevel.DYNAMO_ONCE
compilation_config.mode = CompilationMode.DYNAMO_TRACE_ONCE
compilation_config.backend = backend
compilation_config.inductor_compile_config.update(
{
@@ -277,7 +277,7 @@ class CpuPlatform(Platform):
)
if vllm_config.lora_config is not None:
compilation_config.level = CompilationLevel.NO_COMPILATION
compilation_config.mode = CompilationMode.NONE
assert vllm_config.device_config.device_type == "cpu"

View File

@@ -114,7 +114,7 @@ class TpuPlatform(Platform):
@classmethod
def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
from vllm.config import CompilationLevel, CUDAGraphMode
from vllm.config import CompilationMode, CUDAGraphMode
cache_config = vllm_config.cache_config
# For v0, the default block size is 16.
@@ -122,12 +122,13 @@ class TpuPlatform(Platform):
cache_config.block_size = cast(BlockSize, 16)
compilation_config = vllm_config.compilation_config
# TPU only supports DYNAMO_ONCE compilation level
if compilation_config.level != CompilationLevel.DYNAMO_ONCE:
# TPU only supports DYNAMO_TRACE_ONCE compilation mode
if compilation_config.mode != CompilationMode.DYNAMO_TRACE_ONCE:
logger.info(
"[TPU] Forcing DYNAMO_ONCE compilation level, and disabling cudagraph."
"[TPU] Forcing DYNAMO_TRACE_ONCE compilation mode, and\
disabling cudagraph."
)
compilation_config.level = CompilationLevel.DYNAMO_ONCE
compilation_config.mode = CompilationMode.DYNAMO_TRACE_ONCE
if (
compilation_config.cudagraph_mode is None

View File

@@ -144,7 +144,7 @@ class XPUPlatform(Platform):
cache_config.block_size = 64
# lazy import to avoid circular import
from vllm.config import CompilationLevel, CUDAGraphMode
from vllm.config import CompilationMode, CUDAGraphMode
compilation_config = vllm_config.compilation_config
if compilation_config.compile_sizes is None:
@@ -155,7 +155,7 @@ class XPUPlatform(Platform):
)
if vllm_config.lora_config is not None:
compilation_config.level = CompilationLevel.NO_COMPILATION
compilation_config.mode = CompilationMode.NONE
# check and update parallel config
parallel_config = vllm_config.parallel_config