[torch.compile] caching of config fields should be opt-out by default (#26468)
Signed-off-by: vnadathur <glvikramn@gmail.com> Signed-off-by: WorldExplored <srreyansh.sethi@gmail.com> Signed-off-by: Srreyansh Sethi <srreyansh.sethi@gmail.com> Signed-off-by: Srreyansh Sethi <107075589+WorldExplored@users.noreply.github.com> Co-authored-by: WorldExplored <srreyansh.sethi@gmail.com> Co-authored-by: Srreyansh Sethi <107075589+worldexplored@users.noreply.github.com> Co-authored-by: vnadathur <236933696+vnadathur@users.noreply.github.com> Co-authored-by: Luka Govedič <ProExpertProg@users.noreply.github.com>
This commit is contained in:
@@ -1,7 +1,6 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import hashlib
|
||||
import os
|
||||
from typing import TYPE_CHECKING, Any, Literal
|
||||
|
||||
@@ -448,19 +447,41 @@ class ParallelConfig:
|
||||
This hash is also used for DP worker configuration validation
|
||||
to prevent hangs from mismatched collective communication patterns.
|
||||
"""
|
||||
factors: list[Any] = []
|
||||
factors.append(self.pipeline_parallel_size)
|
||||
factors.append(self.tensor_parallel_size)
|
||||
factors.append(self.enable_expert_parallel)
|
||||
factors.append(self.data_parallel_size)
|
||||
factors.append(self.all2all_backend)
|
||||
factors.append(self.enable_eplb)
|
||||
if self.enable_eplb:
|
||||
factors.append(self.eplb_config.log_balancedness)
|
||||
factors.append(self.eplb_config.window_size)
|
||||
factors.append(self.eplb_config.step_interval)
|
||||
factors.append(self.eplb_config.num_redundant_experts)
|
||||
return hashlib.sha256(str(factors).encode()).hexdigest()
|
||||
ignored_factors = {
|
||||
# Derived/runtime topology, networking, or launch details
|
||||
"data_parallel_rank",
|
||||
"data_parallel_rank_local",
|
||||
"data_parallel_backend",
|
||||
"data_parallel_external_lb",
|
||||
"data_parallel_hybrid_lb",
|
||||
"data_parallel_master_ip",
|
||||
"data_parallel_master_port",
|
||||
"_data_parallel_master_port_list",
|
||||
"data_parallel_rpc_port",
|
||||
"rank",
|
||||
"master_addr",
|
||||
"master_port",
|
||||
"node_rank",
|
||||
"nnodes",
|
||||
"max_parallel_loading_workers",
|
||||
"disable_custom_all_reduce",
|
||||
"ray_workers_use_nsight",
|
||||
"ray_runtime_env",
|
||||
"placement_group",
|
||||
"distributed_executor_backend",
|
||||
"worker_cls",
|
||||
"sd_worker_cls",
|
||||
"worker_extension_cls",
|
||||
"_api_process_count",
|
||||
"_api_process_rank",
|
||||
}
|
||||
|
||||
from vllm.config.utils import get_hash_factors, hash_factors
|
||||
|
||||
factors = get_hash_factors(self, ignored_factors)
|
||||
# Explicitly include backend affecting env factor as before
|
||||
factors["VLLM_ALL2ALL_BACKEND"] = str(envs.VLLM_ALL2ALL_BACKEND)
|
||||
return hash_factors(factors)
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
# Set all2all_backend from env var if not specified, with deprecation warning
|
||||
|
||||
Reference in New Issue
Block a user