[BugFix] Support online dense model DP without overhead (#30739)

Signed-off-by: Nick Hill <nhill@redhat.com>
Signed-off-by: njhill <nickhill123@gmail.com>
This commit is contained in:
Nick Hill
2026-01-02 07:36:38 -08:00
committed by GitHub
parent 08f425bad1
commit bd877162eb
20 changed files with 345 additions and 146 deletions

View File

@@ -630,7 +630,7 @@ class VllmBackend:
os.makedirs(cache_dir, exist_ok=True)
self.compilation_config.cache_dir = cache_dir
rank = vllm_config.parallel_config.rank
dp_rank = vllm_config.parallel_config.data_parallel_rank
dp_rank = vllm_config.parallel_config.data_parallel_index
local_cache_dir = os.path.join(cache_dir, f"rank_{rank}_{dp_rank}", self.prefix)
os.makedirs(local_cache_dir, exist_ok=True)
self.compilation_config.local_cache_dir = local_cache_dir

View File

@@ -403,7 +403,7 @@ def _support_torch_compile(
)
rank = self.vllm_config.parallel_config.rank
dp_rank = self.vllm_config.parallel_config.data_parallel_rank
dp_rank = self.vllm_config.parallel_config.data_parallel_index
cache_dir = os.path.join(cache_dir, f"rank_{rank}_{dp_rank}")
aot_compilation_path = os.path.join(cache_dir, "model")
try: