[misc] Remove is_torch_equal_or_newer(2.4) cases (#32296)

Signed-off-by: angelayi <yiangela7@gmail.com>
This commit is contained in:
Angela Yi
2026-01-13 23:22:07 -08:00
committed by GitHub
parent 6b176095e3
commit 7933638051
4 changed files with 24 additions and 54 deletions

View File

@@ -28,7 +28,7 @@ from vllm.config.compilation import DynamicShapesType
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.sequence import IntermediateTensors from vllm.sequence import IntermediateTensors
from vllm.utils.import_utils import resolve_obj_by_qualname from vllm.utils.import_utils import resolve_obj_by_qualname
from vllm.utils.torch_utils import is_torch_equal_or_newer, supports_dynamo from vllm.utils.torch_utils import is_torch_equal_or_newer
from .monitor import start_monitoring_torch_compile from .monitor import start_monitoring_torch_compile
@@ -312,7 +312,6 @@ def _support_torch_compile(
self.do_not_compile = ( self.do_not_compile = (
self.compilation_config.mode self.compilation_config.mode
in [CompilationMode.NONE, CompilationMode.STOCK_TORCH_COMPILE] in [CompilationMode.NONE, CompilationMode.STOCK_TORCH_COMPILE]
or not supports_dynamo()
or _should_ignore_torch_compile(self.__class__) or _should_ignore_torch_compile(self.__class__)
or not enable_compile or not enable_compile
) )

View File

@@ -53,7 +53,6 @@ from vllm.utils.network_utils import get_distributed_init_method
from vllm.utils.system_utils import suppress_stdout from vllm.utils.system_utils import suppress_stdout
from vllm.utils.torch_utils import ( from vllm.utils.torch_utils import (
direct_register_custom_op, direct_register_custom_op,
supports_custom_op,
) )
@@ -246,33 +245,32 @@ def patched_fused_scaled_matmul_reduce_scatter(
) )
if supports_custom_op(): direct_register_custom_op(
direct_register_custom_op( op_name="all_reduce",
op_name="all_reduce", op_func=all_reduce,
op_func=all_reduce, fake_impl=all_reduce_fake,
fake_impl=all_reduce_fake, )
)
direct_register_custom_op( direct_register_custom_op(
op_name="reduce_scatter", op_name="reduce_scatter",
op_func=reduce_scatter, op_func=reduce_scatter,
fake_impl=reduce_scatter_fake, fake_impl=reduce_scatter_fake,
) )
direct_register_custom_op( direct_register_custom_op(
op_name="all_gather", op_name="all_gather",
op_func=all_gather, op_func=all_gather,
fake_impl=all_gather_fake, fake_impl=all_gather_fake,
) )
# TODO: Remove this once the pytorch fix # TODO: Remove this once the pytorch fix
# (https://github.com/pytorch/pytorch/pull/165086) gets released, # (https://github.com/pytorch/pytorch/pull/165086) gets released,
# in either 2.9.1 or 2.10 # in either 2.9.1 or 2.10
direct_register_custom_op( direct_register_custom_op(
op_name="patched_fused_scaled_matmul_reduce_scatter", op_name="patched_fused_scaled_matmul_reduce_scatter",
op_func=patched_fused_scaled_matmul_reduce_scatter, op_func=patched_fused_scaled_matmul_reduce_scatter,
fake_impl=patched_fused_scaled_matmul_reduce_scatter_fake, fake_impl=patched_fused_scaled_matmul_reduce_scatter_fake,
) )
class GroupCoordinator: class GroupCoordinator:

View File

@@ -704,13 +704,6 @@ def is_torch_equal(target: str) -> bool:
return Version(importlib.metadata.version("torch")) == Version(target) return Version(importlib.metadata.version("torch")) == Version(target)
# Using dynamo with vLLM doesn't really work well with PyTorch versions < 2.4.0.
# In particular, the FakeScalarType is not supported for earlier versions of
# PyTorch which breaks dynamo for any ops registered using ScalarType.
def supports_dynamo() -> bool:
return is_torch_equal_or_newer("2.4.0")
# Supports xccl with PyTorch versions >= 2.8.0.dev for XPU platform # Supports xccl with PyTorch versions >= 2.8.0.dev for XPU platform
def supports_xccl() -> bool: def supports_xccl() -> bool:
return ( return (
@@ -718,12 +711,6 @@ def supports_xccl() -> bool:
) )
# Some backends use pytorch version < 2.4.0 which doesn't
# support `torch.library.custom_op`.
def supports_custom_op() -> bool:
return hasattr(torch.library, "custom_op")
# create a library to hold the custom op # create a library to hold the custom op
vllm_lib = Library("vllm", "FRAGMENT") # noqa vllm_lib = Library("vllm", "FRAGMENT") # noqa
@@ -752,18 +739,6 @@ def direct_register_custom_op(
library object. If you want to bind the operator to a different library, library object. If you want to bind the operator to a different library,
make sure the library object is alive when the operator is used. make sure the library object is alive when the operator is used.
""" """
if not supports_custom_op():
from vllm.platforms import current_platform
assert not current_platform.is_cuda_alike(), (
"cuda platform needs torch>=2.4 to support custom op, "
"chances are you are using an old version of pytorch "
"or a custom build of pytorch. It is recommended to "
"use vLLM in a fresh new environment and let it install "
"the required dependencies."
)
return
if mutates_args is None: if mutates_args is None:
mutates_args = [] mutates_args = []

View File

@@ -96,7 +96,6 @@ from vllm.utils.platform_utils import is_pin_memory_available
from vllm.utils.torch_utils import ( from vllm.utils.torch_utils import (
get_dtype_size, get_dtype_size,
kv_cache_dtype_str_to_dtype, kv_cache_dtype_str_to_dtype,
supports_dynamo,
) )
from vllm.v1.attention.backend import ( from vllm.v1.attention.backend import (
AttentionBackend, AttentionBackend,
@@ -3944,7 +3943,6 @@ class GPUModelRunner(
if ( if (
self.vllm_config.compilation_config.mode self.vllm_config.compilation_config.mode
== CompilationMode.STOCK_TORCH_COMPILE == CompilationMode.STOCK_TORCH_COMPILE
and supports_dynamo()
): ):
backend = self.vllm_config.compilation_config.init_backend(self.vllm_config) backend = self.vllm_config.compilation_config.init_backend(self.vllm_config)
compilation_counter.stock_torch_compile_count += 1 compilation_counter.stock_torch_compile_count += 1