[misc] Remove is_torch_equal_or_newer(2.4) cases (#32296)
Signed-off-by: angelayi <yiangela7@gmail.com>
This commit is contained in:
@@ -28,7 +28,7 @@ from vllm.config.compilation import DynamicShapesType
|
|||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
from vllm.sequence import IntermediateTensors
|
from vllm.sequence import IntermediateTensors
|
||||||
from vllm.utils.import_utils import resolve_obj_by_qualname
|
from vllm.utils.import_utils import resolve_obj_by_qualname
|
||||||
from vllm.utils.torch_utils import is_torch_equal_or_newer, supports_dynamo
|
from vllm.utils.torch_utils import is_torch_equal_or_newer
|
||||||
|
|
||||||
from .monitor import start_monitoring_torch_compile
|
from .monitor import start_monitoring_torch_compile
|
||||||
|
|
||||||
@@ -312,7 +312,6 @@ def _support_torch_compile(
|
|||||||
self.do_not_compile = (
|
self.do_not_compile = (
|
||||||
self.compilation_config.mode
|
self.compilation_config.mode
|
||||||
in [CompilationMode.NONE, CompilationMode.STOCK_TORCH_COMPILE]
|
in [CompilationMode.NONE, CompilationMode.STOCK_TORCH_COMPILE]
|
||||||
or not supports_dynamo()
|
|
||||||
or _should_ignore_torch_compile(self.__class__)
|
or _should_ignore_torch_compile(self.__class__)
|
||||||
or not enable_compile
|
or not enable_compile
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -53,7 +53,6 @@ from vllm.utils.network_utils import get_distributed_init_method
|
|||||||
from vllm.utils.system_utils import suppress_stdout
|
from vllm.utils.system_utils import suppress_stdout
|
||||||
from vllm.utils.torch_utils import (
|
from vllm.utils.torch_utils import (
|
||||||
direct_register_custom_op,
|
direct_register_custom_op,
|
||||||
supports_custom_op,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -246,33 +245,32 @@ def patched_fused_scaled_matmul_reduce_scatter(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
if supports_custom_op():
|
direct_register_custom_op(
|
||||||
direct_register_custom_op(
|
op_name="all_reduce",
|
||||||
op_name="all_reduce",
|
op_func=all_reduce,
|
||||||
op_func=all_reduce,
|
fake_impl=all_reduce_fake,
|
||||||
fake_impl=all_reduce_fake,
|
)
|
||||||
)
|
|
||||||
|
|
||||||
direct_register_custom_op(
|
direct_register_custom_op(
|
||||||
op_name="reduce_scatter",
|
op_name="reduce_scatter",
|
||||||
op_func=reduce_scatter,
|
op_func=reduce_scatter,
|
||||||
fake_impl=reduce_scatter_fake,
|
fake_impl=reduce_scatter_fake,
|
||||||
)
|
)
|
||||||
|
|
||||||
direct_register_custom_op(
|
direct_register_custom_op(
|
||||||
op_name="all_gather",
|
op_name="all_gather",
|
||||||
op_func=all_gather,
|
op_func=all_gather,
|
||||||
fake_impl=all_gather_fake,
|
fake_impl=all_gather_fake,
|
||||||
)
|
)
|
||||||
|
|
||||||
# TODO: Remove this once the pytorch fix
|
# TODO: Remove this once the pytorch fix
|
||||||
# (https://github.com/pytorch/pytorch/pull/165086) gets released,
|
# (https://github.com/pytorch/pytorch/pull/165086) gets released,
|
||||||
# in either 2.9.1 or 2.10
|
# in either 2.9.1 or 2.10
|
||||||
direct_register_custom_op(
|
direct_register_custom_op(
|
||||||
op_name="patched_fused_scaled_matmul_reduce_scatter",
|
op_name="patched_fused_scaled_matmul_reduce_scatter",
|
||||||
op_func=patched_fused_scaled_matmul_reduce_scatter,
|
op_func=patched_fused_scaled_matmul_reduce_scatter,
|
||||||
fake_impl=patched_fused_scaled_matmul_reduce_scatter_fake,
|
fake_impl=patched_fused_scaled_matmul_reduce_scatter_fake,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class GroupCoordinator:
|
class GroupCoordinator:
|
||||||
|
|||||||
@@ -704,13 +704,6 @@ def is_torch_equal(target: str) -> bool:
|
|||||||
return Version(importlib.metadata.version("torch")) == Version(target)
|
return Version(importlib.metadata.version("torch")) == Version(target)
|
||||||
|
|
||||||
|
|
||||||
# Using dynamo with vLLM doesn't really work well with PyTorch versions < 2.4.0.
|
|
||||||
# In particular, the FakeScalarType is not supported for earlier versions of
|
|
||||||
# PyTorch which breaks dynamo for any ops registered using ScalarType.
|
|
||||||
def supports_dynamo() -> bool:
|
|
||||||
return is_torch_equal_or_newer("2.4.0")
|
|
||||||
|
|
||||||
|
|
||||||
# Supports xccl with PyTorch versions >= 2.8.0.dev for XPU platform
|
# Supports xccl with PyTorch versions >= 2.8.0.dev for XPU platform
|
||||||
def supports_xccl() -> bool:
|
def supports_xccl() -> bool:
|
||||||
return (
|
return (
|
||||||
@@ -718,12 +711,6 @@ def supports_xccl() -> bool:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
# Some backends use pytorch version < 2.4.0 which doesn't
|
|
||||||
# support `torch.library.custom_op`.
|
|
||||||
def supports_custom_op() -> bool:
|
|
||||||
return hasattr(torch.library, "custom_op")
|
|
||||||
|
|
||||||
|
|
||||||
# create a library to hold the custom op
|
# create a library to hold the custom op
|
||||||
vllm_lib = Library("vllm", "FRAGMENT") # noqa
|
vllm_lib = Library("vllm", "FRAGMENT") # noqa
|
||||||
|
|
||||||
@@ -752,18 +739,6 @@ def direct_register_custom_op(
|
|||||||
library object. If you want to bind the operator to a different library,
|
library object. If you want to bind the operator to a different library,
|
||||||
make sure the library object is alive when the operator is used.
|
make sure the library object is alive when the operator is used.
|
||||||
"""
|
"""
|
||||||
if not supports_custom_op():
|
|
||||||
from vllm.platforms import current_platform
|
|
||||||
|
|
||||||
assert not current_platform.is_cuda_alike(), (
|
|
||||||
"cuda platform needs torch>=2.4 to support custom op, "
|
|
||||||
"chances are you are using an old version of pytorch "
|
|
||||||
"or a custom build of pytorch. It is recommended to "
|
|
||||||
"use vLLM in a fresh new environment and let it install "
|
|
||||||
"the required dependencies."
|
|
||||||
)
|
|
||||||
return
|
|
||||||
|
|
||||||
if mutates_args is None:
|
if mutates_args is None:
|
||||||
mutates_args = []
|
mutates_args = []
|
||||||
|
|
||||||
|
|||||||
@@ -96,7 +96,6 @@ from vllm.utils.platform_utils import is_pin_memory_available
|
|||||||
from vllm.utils.torch_utils import (
|
from vllm.utils.torch_utils import (
|
||||||
get_dtype_size,
|
get_dtype_size,
|
||||||
kv_cache_dtype_str_to_dtype,
|
kv_cache_dtype_str_to_dtype,
|
||||||
supports_dynamo,
|
|
||||||
)
|
)
|
||||||
from vllm.v1.attention.backend import (
|
from vllm.v1.attention.backend import (
|
||||||
AttentionBackend,
|
AttentionBackend,
|
||||||
@@ -3944,7 +3943,6 @@ class GPUModelRunner(
|
|||||||
if (
|
if (
|
||||||
self.vllm_config.compilation_config.mode
|
self.vllm_config.compilation_config.mode
|
||||||
== CompilationMode.STOCK_TORCH_COMPILE
|
== CompilationMode.STOCK_TORCH_COMPILE
|
||||||
and supports_dynamo()
|
|
||||||
):
|
):
|
||||||
backend = self.vllm_config.compilation_config.init_backend(self.vllm_config)
|
backend = self.vllm_config.compilation_config.init_backend(self.vllm_config)
|
||||||
compilation_counter.stock_torch_compile_count += 1
|
compilation_counter.stock_torch_compile_count += 1
|
||||||
|
|||||||
Reference in New Issue
Block a user