Update to torch==2.6.0 (#12721)
Signed-off-by: mgoin <michael@neuralmagic.com> Signed-off-by: mgoin <mgoin64@gmail.com> Signed-off-by: luka <luka@neuralmagic.com> Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com> Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Luka Govedič <ProExpertProg@users.noreply.github.com> Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk> Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -52,6 +52,8 @@ if TYPE_CHECKING:
|
||||
else:
|
||||
QuantizationConfig = None
|
||||
|
||||
from packaging.version import Version
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
# This value is chosen to have a balance between ITL and TTFT. Note it is
|
||||
@@ -3126,6 +3128,19 @@ class CompilationConfig(BaseModel):
|
||||
count_all = self.custom_ops.count("all")
|
||||
assert count_none + count_all <= 1, "Can only specify 'none' or 'all'"
|
||||
|
||||
# TODO(zou3519/luka): There are 2 issues with auto-functionalization V2:
|
||||
# 1. A bug in PyTorch, fixed in 2.7:
|
||||
# https://github.com/pytorch/pytorch/issues/147924
|
||||
# 2. Custom passes (fusion) rely on auto-functionalization V1 and don't
|
||||
# work with V2. Addressing this will take extra engineering effort
|
||||
# and it is not yet a priority. RFC here:
|
||||
# https://github.com/vllm-project/vllm/issues/14703
|
||||
|
||||
if Version(torch.__version__) >= Version("2.6"):
|
||||
KEY = 'enable_auto_functionalized_v2'
|
||||
if KEY not in self.inductor_compile_config:
|
||||
self.inductor_compile_config[KEY] = False
|
||||
|
||||
if self.splitting_ops is None:
|
||||
if envs.VLLM_USE_V1:
|
||||
# v1 must split the graph on attention ops
|
||||
|
||||
Reference in New Issue
Block a user