[ez] Remove checks for torch version <= 2.8 (#33209)

Signed-off-by: angelayi <yiangela7@gmail.com>
This commit is contained in:
Angela Yi
2026-01-28 13:03:56 -08:00
committed by GitHub
parent 59bcc5b6f2
commit 4197168ea5
11 changed files with 30 additions and 139 deletions

View File

@@ -33,7 +33,6 @@ from vllm.logger import init_logger
from vllm.logging_utils import lazy
from vllm.platforms import current_platform
from vllm.utils.import_utils import resolve_obj_by_qualname
from vllm.utils.torch_utils import is_torch_equal_or_newer
from .compiler_interface import (
CompilerInterface,
@@ -94,10 +93,8 @@ def make_compiler(compilation_config: CompilationConfig) -> CompilerInterface:
if compilation_config.backend == "inductor":
# Use standalone compile only if requested, version is new enough,
# and the symbol actually exists in this PyTorch build.
if (
envs.VLLM_USE_STANDALONE_COMPILE
and is_torch_equal_or_newer("2.8.0.dev")
and hasattr(torch._inductor, "standalone_compile")
if envs.VLLM_USE_STANDALONE_COMPILE and hasattr(
torch._inductor, "standalone_compile"
):
logger.debug("Using InductorStandaloneAdaptor")
return InductorStandaloneAdaptor(

View File

@@ -501,20 +501,19 @@ class InductorAdaptor(CompilerInterface):
# get hit.
# TODO(zou3519): we're going to replace this all with
# standalone_compile sometime.
if is_torch_equal_or_newer("2.6"):
stack.enter_context(
torch._inductor.config.patch(fx_graph_remote_cache=False)
)
# InductorAdaptor (unfortunately) requires AOTAutogradCache
# to be turned off to run. It will fail to acquire the hash_str
# and error if not.
# StandaloneInductorAdaptor (PyTorch 2.8+) fixes this problem.
stack.enter_context(
torch._functorch.config.patch(enable_autograd_cache=False)
)
stack.enter_context(
torch._functorch.config.patch(enable_remote_autograd_cache=False)
)
stack.enter_context(
torch._inductor.config.patch(fx_graph_remote_cache=False)
)
# InductorAdaptor (unfortunately) requires AOTAutogradCache
# to be turned off to run. It will fail to acquire the hash_str
# and error if not.
# StandaloneInductorAdaptor (PyTorch 2.8+) fixes this problem.
stack.enter_context(
torch._functorch.config.patch(enable_autograd_cache=False)
)
stack.enter_context(
torch._functorch.config.patch(enable_remote_autograd_cache=False)
)
compiled_graph = compile_fx(
graph,

View File

@@ -7,12 +7,11 @@ import inspect
import os
import sys
from collections.abc import Callable, Generator
from typing import TYPE_CHECKING, Any, Literal, TypeVar, overload
from typing import TYPE_CHECKING, Any, TypeVar, overload
from unittest.mock import patch
import torch
import torch.nn as nn
from packaging import version
from torch._dynamo.symbolic_convert import InliningInstructionTranslator
import vllm.envs as envs
@@ -540,7 +539,6 @@ def _support_torch_compile(
torch._dynamo.config.patch(**dynamo_config_patches),
maybe_use_cudagraph_partition_wrapper(self.vllm_config),
torch.fx.experimental._config.patch(**fx_config_patches),
_torch27_patch_tensor_subclasses(),
torch._inductor.config.patch(**inductor_config_patches),
):
use_aot_compile = envs.VLLM_USE_AOT_COMPILE
@@ -647,42 +645,3 @@ def maybe_use_cudagraph_partition_wrapper(
and compilation_config.use_inductor_graph_partition
):
torch._inductor.utils.set_customized_partition_wrappers(None)
@contextlib.contextmanager
def _torch27_patch_tensor_subclasses() -> Generator[None, None, None]:
"""
Add support for using tensor subclasses (ie `BasevLLMParameter`, ect) when
using torch 2.7.0. This enables using weight_loader_v2 and the use of
`BasevLLMParameters` without having to replace them with regular tensors
before `torch.compile`-time.
"""
from vllm.model_executor.parameter import (
BasevLLMParameter,
ModelWeightParameter,
RowvLLMParameter,
_ColumnvLLMParameter,
)
def return_false(*args: Any, **kwargs: Any) -> Literal[False]:
return False
if version.parse("2.7") <= version.parse(torch.__version__) < version.parse("2.8"):
yield
return
with (
torch._dynamo.config.patch(
"traceable_tensor_subclasses",
[
BasevLLMParameter,
ModelWeightParameter,
_ColumnvLLMParameter,
RowvLLMParameter,
],
),
patch(
"torch._dynamo.variables.torch.can_dispatch_torch_function", return_false
),
):
yield

View File

@@ -16,18 +16,10 @@ import torch
from torch import fx
from torch._subclasses.fake_tensor import FakeTensorMode, unset_fake_temporarily
from vllm.utils.torch_utils import is_torch_equal_or_newer
if TYPE_CHECKING:
from vllm.config.utils import Range
if is_torch_equal_or_newer("2.6"):
from torch._inductor.custom_graph_pass import CustomGraphPass
else:
# CustomGraphPass is not present in 2.5 or lower, import our version
from .torch25_custom_graph_pass import (
Torch25CustomGraphPass as CustomGraphPass,
)
from torch._inductor.custom_graph_pass import CustomGraphPass
_pass_context = None
P = ParamSpec("P")