Update to torch==2.6.0 (#12721)

Signed-off-by: mgoin <michael@neuralmagic.com> Signed-off-by: mgoin <mgoin64@gmail.com> Signed-off-by: luka <luka@neuralmagic.com> Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com> Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Luka Govedič <ProExpertProg@users.noreply.github.com> Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk> Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-03-14 16:58:30 -04:00
parent 46f98893dd
commit 14f301b541
9 changed files with 43 additions and 23 deletions
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -52,6 +52,8 @@ if TYPE_CHECKING:
 else:
    QuantizationConfig = None

+from packaging.version import Version
+
 logger = init_logger(__name__)

 # This value is chosen to have a balance between ITL and TTFT. Note it is
@@ -3126,6 +3128,19 @@ class CompilationConfig(BaseModel):
        count_all = self.custom_ops.count("all")
        assert count_none + count_all <= 1, "Can only specify 'none' or 'all'"

+        # TODO(zou3519/luka): There are 2 issues with auto-functionalization V2:
+        # 1. A bug in PyTorch, fixed in 2.7:
+        #    https://github.com/pytorch/pytorch/issues/147924
+        # 2. Custom passes (fusion) rely on auto-functionalization V1 and don't
+        #    work with V2. Addressing this will take extra engineering effort
+        #    and it is not yet a priority. RFC here:
+        #    https://github.com/vllm-project/vllm/issues/14703
+
+        if Version(torch.__version__) >= Version("2.6"):
+            KEY = 'enable_auto_functionalized_v2'
+            if KEY not in self.inductor_compile_config:
+                self.inductor_compile_config[KEY] = False
+
        if self.splitting_ops is None:
            if envs.VLLM_USE_V1:
                # v1 must split the graph on attention ops