[misc] Remove is_torch_equal_or_newer(2.4) cases (#32296)
Signed-off-by: angelayi <yiangela7@gmail.com>
This commit is contained in:
@@ -28,7 +28,7 @@ from vllm.config.compilation import DynamicShapesType
|
||||
from vllm.logger import init_logger
|
||||
from vllm.sequence import IntermediateTensors
|
||||
from vllm.utils.import_utils import resolve_obj_by_qualname
|
||||
from vllm.utils.torch_utils import is_torch_equal_or_newer, supports_dynamo
|
||||
from vllm.utils.torch_utils import is_torch_equal_or_newer
|
||||
|
||||
from .monitor import start_monitoring_torch_compile
|
||||
|
||||
@@ -312,7 +312,6 @@ def _support_torch_compile(
|
||||
self.do_not_compile = (
|
||||
self.compilation_config.mode
|
||||
in [CompilationMode.NONE, CompilationMode.STOCK_TORCH_COMPILE]
|
||||
or not supports_dynamo()
|
||||
or _should_ignore_torch_compile(self.__class__)
|
||||
or not enable_compile
|
||||
)
|
||||
|
||||
@@ -53,7 +53,6 @@ from vllm.utils.network_utils import get_distributed_init_method
|
||||
from vllm.utils.system_utils import suppress_stdout
|
||||
from vllm.utils.torch_utils import (
|
||||
direct_register_custom_op,
|
||||
supports_custom_op,
|
||||
)
|
||||
|
||||
|
||||
@@ -246,7 +245,6 @@ def patched_fused_scaled_matmul_reduce_scatter(
|
||||
)
|
||||
|
||||
|
||||
if supports_custom_op():
|
||||
direct_register_custom_op(
|
||||
op_name="all_reduce",
|
||||
op_func=all_reduce,
|
||||
|
||||
@@ -704,13 +704,6 @@ def is_torch_equal(target: str) -> bool:
|
||||
return Version(importlib.metadata.version("torch")) == Version(target)
|
||||
|
||||
|
||||
# Using dynamo with vLLM doesn't really work well with PyTorch versions < 2.4.0.
|
||||
# In particular, the FakeScalarType is not supported for earlier versions of
|
||||
# PyTorch which breaks dynamo for any ops registered using ScalarType.
|
||||
def supports_dynamo() -> bool:
|
||||
return is_torch_equal_or_newer("2.4.0")
|
||||
|
||||
|
||||
# Supports xccl with PyTorch versions >= 2.8.0.dev for XPU platform
|
||||
def supports_xccl() -> bool:
|
||||
return (
|
||||
@@ -718,12 +711,6 @@ def supports_xccl() -> bool:
|
||||
)
|
||||
|
||||
|
||||
# Some backends use pytorch version < 2.4.0 which doesn't
|
||||
# support `torch.library.custom_op`.
|
||||
def supports_custom_op() -> bool:
|
||||
return hasattr(torch.library, "custom_op")
|
||||
|
||||
|
||||
# create a library to hold the custom op
|
||||
vllm_lib = Library("vllm", "FRAGMENT") # noqa
|
||||
|
||||
@@ -752,18 +739,6 @@ def direct_register_custom_op(
|
||||
library object. If you want to bind the operator to a different library,
|
||||
make sure the library object is alive when the operator is used.
|
||||
"""
|
||||
if not supports_custom_op():
|
||||
from vllm.platforms import current_platform
|
||||
|
||||
assert not current_platform.is_cuda_alike(), (
|
||||
"cuda platform needs torch>=2.4 to support custom op, "
|
||||
"chances are you are using an old version of pytorch "
|
||||
"or a custom build of pytorch. It is recommended to "
|
||||
"use vLLM in a fresh new environment and let it install "
|
||||
"the required dependencies."
|
||||
)
|
||||
return
|
||||
|
||||
if mutates_args is None:
|
||||
mutates_args = []
|
||||
|
||||
|
||||
@@ -96,7 +96,6 @@ from vllm.utils.platform_utils import is_pin_memory_available
|
||||
from vllm.utils.torch_utils import (
|
||||
get_dtype_size,
|
||||
kv_cache_dtype_str_to_dtype,
|
||||
supports_dynamo,
|
||||
)
|
||||
from vllm.v1.attention.backend import (
|
||||
AttentionBackend,
|
||||
@@ -3944,7 +3943,6 @@ class GPUModelRunner(
|
||||
if (
|
||||
self.vllm_config.compilation_config.mode
|
||||
== CompilationMode.STOCK_TORCH_COMPILE
|
||||
and supports_dynamo()
|
||||
):
|
||||
backend = self.vllm_config.compilation_config.init_backend(self.vllm_config)
|
||||
compilation_counter.stock_torch_compile_count += 1
|
||||
|
||||
Reference in New Issue
Block a user