[ez] Remove checks for torch version <= 2.8 (#33209)
Signed-off-by: angelayi <yiangela7@gmail.com>
This commit is contained in:
@@ -33,7 +33,6 @@ from vllm.logger import init_logger
|
||||
from vllm.logging_utils import lazy
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.utils.import_utils import resolve_obj_by_qualname
|
||||
from vllm.utils.torch_utils import is_torch_equal_or_newer
|
||||
|
||||
from .compiler_interface import (
|
||||
CompilerInterface,
|
||||
@@ -94,10 +93,8 @@ def make_compiler(compilation_config: CompilationConfig) -> CompilerInterface:
|
||||
if compilation_config.backend == "inductor":
|
||||
# Use standalone compile only if requested, version is new enough,
|
||||
# and the symbol actually exists in this PyTorch build.
|
||||
if (
|
||||
envs.VLLM_USE_STANDALONE_COMPILE
|
||||
and is_torch_equal_or_newer("2.8.0.dev")
|
||||
and hasattr(torch._inductor, "standalone_compile")
|
||||
if envs.VLLM_USE_STANDALONE_COMPILE and hasattr(
|
||||
torch._inductor, "standalone_compile"
|
||||
):
|
||||
logger.debug("Using InductorStandaloneAdaptor")
|
||||
return InductorStandaloneAdaptor(
|
||||
|
||||
@@ -501,20 +501,19 @@ class InductorAdaptor(CompilerInterface):
|
||||
# get hit.
|
||||
# TODO(zou3519): we're going to replace this all with
|
||||
# standalone_compile sometime.
|
||||
if is_torch_equal_or_newer("2.6"):
|
||||
stack.enter_context(
|
||||
torch._inductor.config.patch(fx_graph_remote_cache=False)
|
||||
)
|
||||
# InductorAdaptor (unfortunately) requires AOTAutogradCache
|
||||
# to be turned off to run. It will fail to acquire the hash_str
|
||||
# and error if not.
|
||||
# StandaloneInductorAdaptor (PyTorch 2.8+) fixes this problem.
|
||||
stack.enter_context(
|
||||
torch._functorch.config.patch(enable_autograd_cache=False)
|
||||
)
|
||||
stack.enter_context(
|
||||
torch._functorch.config.patch(enable_remote_autograd_cache=False)
|
||||
)
|
||||
stack.enter_context(
|
||||
torch._inductor.config.patch(fx_graph_remote_cache=False)
|
||||
)
|
||||
# InductorAdaptor (unfortunately) requires AOTAutogradCache
|
||||
# to be turned off to run. It will fail to acquire the hash_str
|
||||
# and error if not.
|
||||
# StandaloneInductorAdaptor (PyTorch 2.8+) fixes this problem.
|
||||
stack.enter_context(
|
||||
torch._functorch.config.patch(enable_autograd_cache=False)
|
||||
)
|
||||
stack.enter_context(
|
||||
torch._functorch.config.patch(enable_remote_autograd_cache=False)
|
||||
)
|
||||
|
||||
compiled_graph = compile_fx(
|
||||
graph,
|
||||
|
||||
@@ -7,12 +7,11 @@ import inspect
|
||||
import os
|
||||
import sys
|
||||
from collections.abc import Callable, Generator
|
||||
from typing import TYPE_CHECKING, Any, Literal, TypeVar, overload
|
||||
from typing import TYPE_CHECKING, Any, TypeVar, overload
|
||||
from unittest.mock import patch
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from packaging import version
|
||||
from torch._dynamo.symbolic_convert import InliningInstructionTranslator
|
||||
|
||||
import vllm.envs as envs
|
||||
@@ -540,7 +539,6 @@ def _support_torch_compile(
|
||||
torch._dynamo.config.patch(**dynamo_config_patches),
|
||||
maybe_use_cudagraph_partition_wrapper(self.vllm_config),
|
||||
torch.fx.experimental._config.patch(**fx_config_patches),
|
||||
_torch27_patch_tensor_subclasses(),
|
||||
torch._inductor.config.patch(**inductor_config_patches),
|
||||
):
|
||||
use_aot_compile = envs.VLLM_USE_AOT_COMPILE
|
||||
@@ -647,42 +645,3 @@ def maybe_use_cudagraph_partition_wrapper(
|
||||
and compilation_config.use_inductor_graph_partition
|
||||
):
|
||||
torch._inductor.utils.set_customized_partition_wrappers(None)
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def _torch27_patch_tensor_subclasses() -> Generator[None, None, None]:
|
||||
"""
|
||||
Add support for using tensor subclasses (ie `BasevLLMParameter`, ect) when
|
||||
using torch 2.7.0. This enables using weight_loader_v2 and the use of
|
||||
`BasevLLMParameters` without having to replace them with regular tensors
|
||||
before `torch.compile`-time.
|
||||
"""
|
||||
from vllm.model_executor.parameter import (
|
||||
BasevLLMParameter,
|
||||
ModelWeightParameter,
|
||||
RowvLLMParameter,
|
||||
_ColumnvLLMParameter,
|
||||
)
|
||||
|
||||
def return_false(*args: Any, **kwargs: Any) -> Literal[False]:
|
||||
return False
|
||||
|
||||
if version.parse("2.7") <= version.parse(torch.__version__) < version.parse("2.8"):
|
||||
yield
|
||||
return
|
||||
|
||||
with (
|
||||
torch._dynamo.config.patch(
|
||||
"traceable_tensor_subclasses",
|
||||
[
|
||||
BasevLLMParameter,
|
||||
ModelWeightParameter,
|
||||
_ColumnvLLMParameter,
|
||||
RowvLLMParameter,
|
||||
],
|
||||
),
|
||||
patch(
|
||||
"torch._dynamo.variables.torch.can_dispatch_torch_function", return_false
|
||||
),
|
||||
):
|
||||
yield
|
||||
|
||||
@@ -16,18 +16,10 @@ import torch
|
||||
from torch import fx
|
||||
from torch._subclasses.fake_tensor import FakeTensorMode, unset_fake_temporarily
|
||||
|
||||
from vllm.utils.torch_utils import is_torch_equal_or_newer
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from vllm.config.utils import Range
|
||||
|
||||
if is_torch_equal_or_newer("2.6"):
|
||||
from torch._inductor.custom_graph_pass import CustomGraphPass
|
||||
else:
|
||||
# CustomGraphPass is not present in 2.5 or lower, import our version
|
||||
from .torch25_custom_graph_pass import (
|
||||
Torch25CustomGraphPass as CustomGraphPass,
|
||||
)
|
||||
from torch._inductor.custom_graph_pass import CustomGraphPass
|
||||
|
||||
_pass_context = None
|
||||
P = ParamSpec("P")
|
||||
|
||||
Reference in New Issue
Block a user