Update to torch==2.6.0 (#12721)

Signed-off-by: mgoin <michael@neuralmagic.com>
Signed-off-by: mgoin <mgoin64@gmail.com>
Signed-off-by: luka <luka@neuralmagic.com>
Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Co-authored-by: Luka Govedič <ProExpertProg@users.noreply.github.com>
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com>
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Michael Goin
2025-03-14 16:58:30 -04:00
committed by GitHub
parent 46f98893dd
commit 14f301b541
9 changed files with 43 additions and 23 deletions

View File

@@ -52,6 +52,8 @@ if TYPE_CHECKING:
else:
QuantizationConfig = None
from packaging.version import Version
logger = init_logger(__name__)
# This value is chosen to have a balance between ITL and TTFT. Note it is
@@ -3126,6 +3128,19 @@ class CompilationConfig(BaseModel):
count_all = self.custom_ops.count("all")
assert count_none + count_all <= 1, "Can only specify 'none' or 'all'"
# TODO(zou3519/luka): There are 2 issues with auto-functionalization V2:
# 1. A bug in PyTorch, fixed in 2.7:
# https://github.com/pytorch/pytorch/issues/147924
# 2. Custom passes (fusion) rely on auto-functionalization V1 and don't
# work with V2. Addressing this will take extra engineering effort
# and it is not yet a priority. RFC here:
# https://github.com/vllm-project/vllm/issues/14703
if Version(torch.__version__) >= Version("2.6"):
KEY = 'enable_auto_functionalized_v2'
if KEY not in self.inductor_compile_config:
self.inductor_compile_config[KEY] = False
if self.splitting_ops is None:
if envs.VLLM_USE_V1:
# v1 must split the graph on attention ops