[ez] Remove checks for torch version <= 2.8 (#33209)

Signed-off-by: angelayi <yiangela7@gmail.com>
2026-01-28 13:03:56 -08:00
parent 59bcc5b6f2
commit 4197168ea5
11 changed files with 30 additions and 139 deletions
--- a/vllm/compilation/backends.py
+++ b/vllm/compilation/backends.py
@@ -33,7 +33,6 @@ from vllm.logger import init_logger
 from vllm.logging_utils import lazy
 from vllm.platforms import current_platform
 from vllm.utils.import_utils import resolve_obj_by_qualname
-from vllm.utils.torch_utils import is_torch_equal_or_newer

 from .compiler_interface import (
    CompilerInterface,
@@ -94,10 +93,8 @@ def make_compiler(compilation_config: CompilationConfig) -> CompilerInterface:
    if compilation_config.backend == "inductor":
        # Use standalone compile only if requested, version is new enough,
        # and the symbol actually exists in this PyTorch build.
-        if (
-            envs.VLLM_USE_STANDALONE_COMPILE
-            and is_torch_equal_or_newer("2.8.0.dev")
-            and hasattr(torch._inductor, "standalone_compile")
+        if envs.VLLM_USE_STANDALONE_COMPILE and hasattr(
+            torch._inductor, "standalone_compile"
        ):
            logger.debug("Using InductorStandaloneAdaptor")
            return InductorStandaloneAdaptor(
--- a/vllm/compilation/compiler_interface.py
+++ b/vllm/compilation/compiler_interface.py
@@ -501,20 +501,19 @@ class InductorAdaptor(CompilerInterface):
            # get hit.
            # TODO(zou3519): we're going to replace this all with
            # standalone_compile sometime.
-            if is_torch_equal_or_newer("2.6"):
-                stack.enter_context(
-                    torch._inductor.config.patch(fx_graph_remote_cache=False)
-                )
-                # InductorAdaptor (unfortunately) requires AOTAutogradCache
-                # to be turned off to run. It will fail to acquire the hash_str
-                # and error if not.
-                # StandaloneInductorAdaptor (PyTorch 2.8+) fixes this problem.
-                stack.enter_context(
-                    torch._functorch.config.patch(enable_autograd_cache=False)
-                )
-                stack.enter_context(
-                    torch._functorch.config.patch(enable_remote_autograd_cache=False)
-                )
+            stack.enter_context(
+                torch._inductor.config.patch(fx_graph_remote_cache=False)
+            )
+            # InductorAdaptor (unfortunately) requires AOTAutogradCache
+            # to be turned off to run. It will fail to acquire the hash_str
+            # and error if not.
+            # StandaloneInductorAdaptor (PyTorch 2.8+) fixes this problem.
+            stack.enter_context(
+                torch._functorch.config.patch(enable_autograd_cache=False)
+            )
+            stack.enter_context(
+                torch._functorch.config.patch(enable_remote_autograd_cache=False)
+            )

            compiled_graph = compile_fx(
                graph,
--- a/vllm/compilation/decorators.py
+++ b/vllm/compilation/decorators.py
@@ -7,12 +7,11 @@ import inspect
 import os
 import sys
 from collections.abc import Callable, Generator
-from typing import TYPE_CHECKING, Any, Literal, TypeVar, overload
+from typing import TYPE_CHECKING, Any, TypeVar, overload
 from unittest.mock import patch

 import torch
 import torch.nn as nn
-from packaging import version
 from torch._dynamo.symbolic_convert import InliningInstructionTranslator

 import vllm.envs as envs
@@ -540,7 +539,6 @@ def _support_torch_compile(
            torch._dynamo.config.patch(**dynamo_config_patches),
            maybe_use_cudagraph_partition_wrapper(self.vllm_config),
            torch.fx.experimental._config.patch(**fx_config_patches),
-            _torch27_patch_tensor_subclasses(),
            torch._inductor.config.patch(**inductor_config_patches),
        ):
            use_aot_compile = envs.VLLM_USE_AOT_COMPILE
@@ -647,42 +645,3 @@ def maybe_use_cudagraph_partition_wrapper(
        and compilation_config.use_inductor_graph_partition
    ):
        torch._inductor.utils.set_customized_partition_wrappers(None)
-
-
-@contextlib.contextmanager
-def _torch27_patch_tensor_subclasses() -> Generator[None, None, None]:
-    """
-    Add support for using tensor subclasses (ie `BasevLLMParameter`, ect) when
-    using torch 2.7.0. This enables using weight_loader_v2 and the use of
-    `BasevLLMParameters` without having to replace them with regular tensors
-    before `torch.compile`-time.
-    """
-    from vllm.model_executor.parameter import (
-        BasevLLMParameter,
-        ModelWeightParameter,
-        RowvLLMParameter,
-        _ColumnvLLMParameter,
-    )
-
-    def return_false(*args: Any, **kwargs: Any) -> Literal[False]:
-        return False
-
-    if version.parse("2.7") <= version.parse(torch.__version__) < version.parse("2.8"):
-        yield
-        return
-
-    with (
-        torch._dynamo.config.patch(
-            "traceable_tensor_subclasses",
-            [
-                BasevLLMParameter,
-                ModelWeightParameter,
-                _ColumnvLLMParameter,
-                RowvLLMParameter,
-            ],
-        ),
-        patch(
-            "torch._dynamo.variables.torch.can_dispatch_torch_function", return_false
-        ),
-    ):
-        yield
--- a/vllm/compilation/inductor_pass.py
+++ b/vllm/compilation/inductor_pass.py
@@ -16,18 +16,10 @@ import torch
 from torch import fx
 from torch._subclasses.fake_tensor import FakeTensorMode, unset_fake_temporarily

-from vllm.utils.torch_utils import is_torch_equal_or_newer
-
 if TYPE_CHECKING:
    from vllm.config.utils import Range

-if is_torch_equal_or_newer("2.6"):
-    from torch._inductor.custom_graph_pass import CustomGraphPass
-else:
-    # CustomGraphPass is not present in 2.5 or lower, import our version
-    from .torch25_custom_graph_pass import (
-        Torch25CustomGraphPass as CustomGraphPass,
-    )
+from torch._inductor.custom_graph_pass import CustomGraphPass

 _pass_context = None
 P = ParamSpec("P")