[misc] Remove is_torch_equal_or_newer(2.4) cases (#32296)

Signed-off-by: angelayi <yiangela7@gmail.com>
2026-01-13 23:22:07 -08:00
parent 6b176095e3
commit 7933638051
4 changed files with 24 additions and 54 deletions
--- a/vllm/compilation/decorators.py
+++ b/vllm/compilation/decorators.py
@@ -28,7 +28,7 @@ from vllm.config.compilation import DynamicShapesType
 from vllm.logger import init_logger
 from vllm.sequence import IntermediateTensors
 from vllm.utils.import_utils import resolve_obj_by_qualname
-from vllm.utils.torch_utils import is_torch_equal_or_newer, supports_dynamo
+from vllm.utils.torch_utils import is_torch_equal_or_newer
 from .monitor import start_monitoring_torch_compile
@@ -312,7 +312,6 @@ def _support_torch_compile(
        self.do_not_compile = (
            self.compilation_config.mode
            in [CompilationMode.NONE, CompilationMode.STOCK_TORCH_COMPILE]
            or not supports_dynamo()
            or _should_ignore_torch_compile(self.__class__)
            or not enable_compile
        )
--- a/vllm/distributed/parallel_state.py
+++ b/vllm/distributed/parallel_state.py
@@ -53,7 +53,6 @@ from vllm.utils.network_utils import get_distributed_init_method
 from vllm.utils.system_utils import suppress_stdout
 from vllm.utils.torch_utils import (
    direct_register_custom_op,
    supports_custom_op,
 )
@@ -246,33 +245,32 @@ def patched_fused_scaled_matmul_reduce_scatter(
    )
-if supports_custom_op():
+direct_register_custom_op(
-    direct_register_custom_op(
+    op_name="all_reduce",
-        op_name="all_reduce",
+    op_func=all_reduce,
-        op_func=all_reduce,
+    fake_impl=all_reduce_fake,
-        fake_impl=all_reduce_fake,
+)
    )
-    direct_register_custom_op(
+direct_register_custom_op(
-        op_name="reduce_scatter",
+    op_name="reduce_scatter",
-        op_func=reduce_scatter,
+    op_func=reduce_scatter,
-        fake_impl=reduce_scatter_fake,
+    fake_impl=reduce_scatter_fake,
-    )
+)
-    direct_register_custom_op(
+direct_register_custom_op(
-        op_name="all_gather",
+    op_name="all_gather",
-        op_func=all_gather,
+    op_func=all_gather,
-        fake_impl=all_gather_fake,
+    fake_impl=all_gather_fake,
-    )
+)
-    # TODO: Remove this once the pytorch fix
+# TODO: Remove this once the pytorch fix
-    # (https://github.com/pytorch/pytorch/pull/165086) gets released,
+# (https://github.com/pytorch/pytorch/pull/165086) gets released,
-    # in either 2.9.1 or 2.10
+# in either 2.9.1 or 2.10
-    direct_register_custom_op(
+direct_register_custom_op(
-        op_name="patched_fused_scaled_matmul_reduce_scatter",
+    op_name="patched_fused_scaled_matmul_reduce_scatter",
-        op_func=patched_fused_scaled_matmul_reduce_scatter,
+    op_func=patched_fused_scaled_matmul_reduce_scatter,
-        fake_impl=patched_fused_scaled_matmul_reduce_scatter_fake,
+    fake_impl=patched_fused_scaled_matmul_reduce_scatter_fake,
-    )
+)
 class GroupCoordinator:
--- a/vllm/utils/torch_utils.py
+++ b/vllm/utils/torch_utils.py
@@ -704,13 +704,6 @@ def is_torch_equal(target: str) -> bool:
        return Version(importlib.metadata.version("torch")) == Version(target)
 # Using dynamo with vLLM doesn't really work well with PyTorch versions < 2.4.0.
 # In particular, the FakeScalarType is not supported for earlier versions of
 # PyTorch which breaks dynamo for any ops registered using ScalarType.
 def supports_dynamo() -> bool:
    return is_torch_equal_or_newer("2.4.0")
 # Supports xccl with PyTorch versions >= 2.8.0.dev for XPU platform
 def supports_xccl() -> bool:
    return (
@@ -718,12 +711,6 @@ def supports_xccl() -> bool:
    )
 # Some backends use pytorch version < 2.4.0 which doesn't
 # support `torch.library.custom_op`.
 def supports_custom_op() -> bool:
    return hasattr(torch.library, "custom_op")
 # create a library to hold the custom op
 vllm_lib = Library("vllm", "FRAGMENT")  # noqa
@@ -752,18 +739,6 @@ def direct_register_custom_op(
    library object. If you want to bind the operator to a different library,
    make sure the library object is alive when the operator is used.
    """
    if not supports_custom_op():
        from vllm.platforms import current_platform
        assert not current_platform.is_cuda_alike(), (
            "cuda platform needs torch>=2.4 to support custom op, "
            "chances are you are using an old version of pytorch "
            "or a custom build of pytorch. It is recommended to "
            "use vLLM in a fresh new environment and let it install "
            "the required dependencies."
        )
        return
    if mutates_args is None:
        mutates_args = []
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -96,7 +96,6 @@ from vllm.utils.platform_utils import is_pin_memory_available
 from vllm.utils.torch_utils import (
    get_dtype_size,
    kv_cache_dtype_str_to_dtype,
    supports_dynamo,
 )
 from vllm.v1.attention.backend import (
    AttentionBackend,
@@ -3944,7 +3943,6 @@ class GPUModelRunner(
        if (
            self.vllm_config.compilation_config.mode
            == CompilationMode.STOCK_TORCH_COMPILE
            and supports_dynamo()
        ):
            backend = self.vllm_config.compilation_config.init_backend(self.vllm_config)
            compilation_counter.stock_torch_compile_count += 1