[ez] Remove checks for torch version <= 2.8 (#33209)

Signed-off-by: angelayi <yiangela7@gmail.com>
2026-01-28 13:03:56 -08:00
parent 59bcc5b6f2
commit 4197168ea5
11 changed files with 30 additions and 139 deletions
--- a/vllm/model_executor/layers/fused_moe/fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/fused_moe.py
@@ -52,7 +52,7 @@ from vllm.model_executor.layers.quantization.utils.quant_utils import (
 )
 from vllm.platforms import current_platform
 from vllm.triton_utils import tl, triton
-from vllm.utils.torch_utils import direct_register_custom_op, is_torch_equal_or_newer
+from vllm.utils.torch_utils import direct_register_custom_op

 logger = init_logger(__name__)

@@ -1406,11 +1406,6 @@ direct_register_custom_op(
    op_func=inplace_fused_experts,
    mutates_args=["hidden_states"],
    fake_impl=inplace_fused_experts_fake,
-    tags=(
-        ()
-        if is_torch_equal_or_newer("2.7.0")
-        else (torch.Tag.needs_fixed_stride_order,)
-    ),
 )


@@ -1501,11 +1496,6 @@ direct_register_custom_op(
    op_name="outplace_fused_experts",
    op_func=outplace_fused_experts,
    fake_impl=outplace_fused_experts_fake,
-    tags=(
-        ()
-        if is_torch_equal_or_newer("2.7.0")
-        else (torch.Tag.needs_fixed_stride_order,)
-    ),
 )


--- a/vllm/model_executor/layers/quantization/mxfp4.py
+++ b/vllm/model_executor/layers/quantization/mxfp4.py
@@ -56,7 +56,6 @@ from vllm.scalar_type import scalar_types
 from vllm.utils.flashinfer import has_flashinfer
 from vllm.utils.import_utils import has_triton_kernels
 from vllm.utils.math_utils import round_up
-from vllm.utils.torch_utils import is_torch_equal_or_newer

 logger = init_logger(__name__)

@@ -89,7 +88,6 @@ def get_mxfp4_backend_with_lora() -> Mxfp4Backend:
    # If FlashInfer is not available, try either Marlin or Triton
    triton_kernels_supported = (
        has_triton_kernels()
-        and is_torch_equal_or_newer("2.8.0")
        # NOTE: triton_kernels are only confirmed to work on SM90 and SM100
        # SM110 fails with this error: https://github.com/vllm-project/vllm/issues/29317
        # SM120 needs this fix: https://github.com/triton-lang/triton/pull/8498
@@ -151,7 +149,6 @@ def get_mxfp4_backend(with_lora_support: bool) -> Mxfp4Backend:
        # If FlashInfer is not available, try either Marlin or Triton
        triton_kernels_supported = (
            has_triton_kernels()
-            and is_torch_equal_or_newer("2.8.0")
            # NOTE: triton_kernels are only confirmed to work on SM90 and SM100
            # SM110 fails with this error: https://github.com/vllm-project/vllm/issues/29317
            # SM120 needs this fix: https://github.com/triton-lang/triton/pull/8498
--- a/vllm/model_executor/layers/quantization/torchao.py
+++ b/vllm/model_executor/layers/quantization/torchao.py
@@ -108,20 +108,6 @@ class TorchAOConfig(QuantizationConfig):
        skip_modules: list[str] | None = None,
        is_checkpoint_torchao_serialized: bool = False,
    ) -> None:
-        """
-        # TorchAO quantization relies on tensor subclasses. In order,
-        # to enable proper caching this needs standalone compile
-        if is_torch_equal_or_newer("2.8.0.dev"):
-            os.environ["VLLM_TEST_STANDALONE_COMPILE"] = "1"
-            logger.info(
-                "Using TorchAO: Setting VLLM_TEST_STANDALONE_COMPILE=1")
-
-        # TODO: remove after the torch dependency is updated to 2.8
-        if is_torch_equal_or_newer(
-                "2.7.0") and not is_torch_equal_or_newer("2.8.0.dev"):
-            os.environ["VLLM_DISABLE_COMPILE_CACHE"] = "1"
-            logger.info("Using TorchAO: Setting VLLM_DISABLE_COMPILE_CACHE=1")
-        """
        super().__init__()
        self.torchao_config = torchao_config
        self.skip_modules = skip_modules or []