[FrontEnd] UNREVERT CompilationConfig overhaul (#20283): deprecate use_inductor in favor of backend, simplify custom_ops (#26502)
Signed-off-by: morrison-turnansky <mturnans@redhat.com> Signed-off-by: Morrison Turnansky <mturnans@redhat.com> Signed-off-by: Luka Govedič <ProExpertProg@users.noreply.github.com> Co-authored-by: Luka Govedič <ProExpertProg@users.noreply.github.com> Co-authored-by: Jiangyun Zhu <riverclouds.zhu@qq.com>
This commit is contained in:
committed by
GitHub
parent
7200a21cd1
commit
e3fdb627d9
@@ -15,6 +15,7 @@ from pydantic.dataclasses import dataclass
|
||||
from vllm.compilation.inductor_pass import CallableInductorPass, InductorPass
|
||||
from vllm.config.utils import config
|
||||
from vllm.logger import init_logger
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.utils import is_torch_equal_or_newer, resolve_obj_by_qualname
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@@ -187,7 +188,8 @@ class CompilationConfig:
|
||||
backend: str = ""
|
||||
"""The backend for compilation. It needs to be a string:
|
||||
|
||||
- "" (empty string): use the default backend.
|
||||
- "" (empty string): use the default backend ("inductor" on CUDA-alike
|
||||
platforms).
|
||||
- "eager"/"openxla"/...: use the specified backend registered in PyTorch.
|
||||
- "full.module.name": a qualified name which can be used to import the
|
||||
|
||||
@@ -196,7 +198,12 @@ class CompilationConfig:
|
||||
distributed setting. When the compilation level is 1 or 2, the backend is
|
||||
used for the compilation directly (it sees the whole graph). When the
|
||||
compilation level is 3, the backend is used for the piecewise compilation
|
||||
(it sees a part of the graph)."""
|
||||
(it sees a part of the graph). The backend can not be custom for compilation
|
||||
level 3, i.e. the backend must be either eager or inductor. Furthermore,
|
||||
compilation is only piecewise if splitting ops is set accordingly and
|
||||
use_inductor_cudagraphs_partition is off. Note that the default options for
|
||||
splitting ops are sufficient for piecewise compilation.
|
||||
"""
|
||||
custom_ops: list[str] = field(default_factory=list)
|
||||
"""Fine-grained control over which custom ops to enable/disable. Use 'all'
|
||||
to enable all, 'none' to disable all. Also specify a list of custom op
|
||||
@@ -229,8 +236,12 @@ class CompilationConfig:
|
||||
If empty list [], no ops are excluded (suitable for full cudagraphs)."""
|
||||
|
||||
# Inductor capture
|
||||
use_inductor: bool = True
|
||||
"""Whether to use inductor compilation:
|
||||
use_inductor: bool | None = None
|
||||
"""
|
||||
Whether to use inductor compilation.
|
||||
|
||||
This flag is deprecated and will be removed in the next release 0.12.0.
|
||||
Please use the 'backend' option instead.
|
||||
|
||||
- False: inductor compilation is not used. graph runs in eager
|
||||
(custom_ops enabled by default).
|
||||
@@ -238,7 +249,11 @@ class CompilationConfig:
|
||||
One graph for symbolic shape and one graph per size in compile_sizes
|
||||
are compiled using configurations in inductor_compile_config.
|
||||
|
||||
This setting is ignored if level<PIECEWISE."""
|
||||
This setting is ignored if level<PIECEWISE.
|
||||
|
||||
For future compatibility:
|
||||
If use_inductor is True, backend="inductor" otherwise backend="eager".
|
||||
"""
|
||||
compile_sizes: list[int | str] | None = None
|
||||
"""Sizes to compile for inductor. In addition
|
||||
to integers, it also supports "cudagraph_capture_sizes" to
|
||||
@@ -545,7 +560,43 @@ class CompilationConfig:
|
||||
"(where 'op' is the registered op name)"
|
||||
)
|
||||
|
||||
# Currently only eager and inductor backend are supported.
|
||||
# for piecewise compilation. Custom backends are not suppported for
|
||||
# piecewise compilation. Update when more backends are supported.
|
||||
if self.level == CompilationLevel.PIECEWISE and self.backend not in [
|
||||
"",
|
||||
"eager",
|
||||
"inductor",
|
||||
]:
|
||||
raise ValueError(
|
||||
f"Invalid backend for piecewise compilation: {self.backend}"
|
||||
)
|
||||
|
||||
if self.use_inductor is not None:
|
||||
logger.warning_once(
|
||||
"The 'use_inductor' flag is deprecated and will be "
|
||||
"removed in the next release (v0.12.0). "
|
||||
"Please use the 'backend' option instead.",
|
||||
)
|
||||
self.backend = "inductor" if self.use_inductor else "eager"
|
||||
|
||||
if self.backend == "":
|
||||
self.backend = current_platform.simple_compile_backend
|
||||
|
||||
def init_backend(self, vllm_config: "VllmConfig") -> str | Callable:
|
||||
"""
|
||||
Initialize the backend for the compilation config from a vllm config.
|
||||
Arguments:
|
||||
vllm_config: The vllm config to initialize the backend from.
|
||||
Returns:
|
||||
The backend for the compilation config.
|
||||
"""
|
||||
if self.level is None:
|
||||
raise ValueError(
|
||||
"No compilation level is set. This method should only be \
|
||||
called via vllm config where the level is set if none is \
|
||||
provided."
|
||||
)
|
||||
if self.level == CompilationLevel.NO_COMPILATION:
|
||||
raise ValueError("No compilation level is set.")
|
||||
|
||||
@@ -553,15 +604,15 @@ class CompilationConfig:
|
||||
|
||||
torch_backends = list_backends(exclude_tags=tuple())
|
||||
if self.level in [CompilationLevel.DYNAMO_AS_IS, CompilationLevel.DYNAMO_ONCE]:
|
||||
if self.backend == "":
|
||||
return "eager"
|
||||
if self.backend in torch_backends:
|
||||
return self.backend
|
||||
return resolve_obj_by_qualname(self.backend)
|
||||
|
||||
# TODO: pass user-specified backend to piecewise compilation
|
||||
# merge with the config use_inductor
|
||||
assert self.level == CompilationLevel.PIECEWISE
|
||||
if self.backend not in ["eager", "inductor"]:
|
||||
raise ValueError(
|
||||
f"Invalid backend for piecewise compilation: {self.backend}"
|
||||
)
|
||||
|
||||
from vllm.compilation.backends import VllmBackend
|
||||
|
||||
@@ -710,7 +761,9 @@ class CompilationConfig:
|
||||
return self.level == CompilationLevel.PIECEWISE
|
||||
|
||||
# Inductor partition case
|
||||
return self.level > CompilationLevel.NO_COMPILATION and self.use_inductor
|
||||
return (
|
||||
self.backend == "inductor" and self.level > CompilationLevel.NO_COMPILATION
|
||||
)
|
||||
|
||||
def custom_op_log_check(self):
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user