[ez] Remove checks for torch version <= 2.8 (#33209)
Signed-off-by: angelayi <yiangela7@gmail.com>
This commit is contained in:
@@ -52,7 +52,7 @@ from vllm.model_executor.layers.quantization.utils.quant_utils import (
|
||||
)
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.triton_utils import tl, triton
|
||||
from vllm.utils.torch_utils import direct_register_custom_op, is_torch_equal_or_newer
|
||||
from vllm.utils.torch_utils import direct_register_custom_op
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
@@ -1406,11 +1406,6 @@ direct_register_custom_op(
|
||||
op_func=inplace_fused_experts,
|
||||
mutates_args=["hidden_states"],
|
||||
fake_impl=inplace_fused_experts_fake,
|
||||
tags=(
|
||||
()
|
||||
if is_torch_equal_or_newer("2.7.0")
|
||||
else (torch.Tag.needs_fixed_stride_order,)
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -1501,11 +1496,6 @@ direct_register_custom_op(
|
||||
op_name="outplace_fused_experts",
|
||||
op_func=outplace_fused_experts,
|
||||
fake_impl=outplace_fused_experts_fake,
|
||||
tags=(
|
||||
()
|
||||
if is_torch_equal_or_newer("2.7.0")
|
||||
else (torch.Tag.needs_fixed_stride_order,)
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -56,7 +56,6 @@ from vllm.scalar_type import scalar_types
|
||||
from vllm.utils.flashinfer import has_flashinfer
|
||||
from vllm.utils.import_utils import has_triton_kernels
|
||||
from vllm.utils.math_utils import round_up
|
||||
from vllm.utils.torch_utils import is_torch_equal_or_newer
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
@@ -89,7 +88,6 @@ def get_mxfp4_backend_with_lora() -> Mxfp4Backend:
|
||||
# If FlashInfer is not available, try either Marlin or Triton
|
||||
triton_kernels_supported = (
|
||||
has_triton_kernels()
|
||||
and is_torch_equal_or_newer("2.8.0")
|
||||
# NOTE: triton_kernels are only confirmed to work on SM90 and SM100
|
||||
# SM110 fails with this error: https://github.com/vllm-project/vllm/issues/29317
|
||||
# SM120 needs this fix: https://github.com/triton-lang/triton/pull/8498
|
||||
@@ -151,7 +149,6 @@ def get_mxfp4_backend(with_lora_support: bool) -> Mxfp4Backend:
|
||||
# If FlashInfer is not available, try either Marlin or Triton
|
||||
triton_kernels_supported = (
|
||||
has_triton_kernels()
|
||||
and is_torch_equal_or_newer("2.8.0")
|
||||
# NOTE: triton_kernels are only confirmed to work on SM90 and SM100
|
||||
# SM110 fails with this error: https://github.com/vllm-project/vllm/issues/29317
|
||||
# SM120 needs this fix: https://github.com/triton-lang/triton/pull/8498
|
||||
|
||||
@@ -108,20 +108,6 @@ class TorchAOConfig(QuantizationConfig):
|
||||
skip_modules: list[str] | None = None,
|
||||
is_checkpoint_torchao_serialized: bool = False,
|
||||
) -> None:
|
||||
"""
|
||||
# TorchAO quantization relies on tensor subclasses. In order,
|
||||
# to enable proper caching this needs standalone compile
|
||||
if is_torch_equal_or_newer("2.8.0.dev"):
|
||||
os.environ["VLLM_TEST_STANDALONE_COMPILE"] = "1"
|
||||
logger.info(
|
||||
"Using TorchAO: Setting VLLM_TEST_STANDALONE_COMPILE=1")
|
||||
|
||||
# TODO: remove after the torch dependency is updated to 2.8
|
||||
if is_torch_equal_or_newer(
|
||||
"2.7.0") and not is_torch_equal_or_newer("2.8.0.dev"):
|
||||
os.environ["VLLM_DISABLE_COMPILE_CACHE"] = "1"
|
||||
logger.info("Using TorchAO: Setting VLLM_DISABLE_COMPILE_CACHE=1")
|
||||
"""
|
||||
super().__init__()
|
||||
self.torchao_config = torchao_config
|
||||
self.skip_modules = skip_modules or []
|
||||
|
||||
Reference in New Issue
Block a user