elastic_ep: Fix issues with repeated scale up/down cycles (#37131)

Signed-off-by: Itay Alroy <ialroy@nvidia.com>
Co-authored-by: Ron Tourgeman <rtourgeman@nvidia.com>
This commit is contained in:
Itay Alroy
2026-03-21 01:13:02 +02:00
committed by GitHub
parent e5ed6c6c13
commit c57d38d603
10 changed files with 129 additions and 90 deletions

View File

@@ -602,13 +602,14 @@ class WorkerProc:
)
# Load model
is_eep_new_worker = envs.VLLM_ELASTIC_EP_SCALE_UP_LAUNCH
if not is_eep_new_worker:
self.worker.init_device()
# Update process title now that parallel groups are initialized
self.setup_proc_title_and_log_prefix(
enable_ep=vllm_config.parallel_config.enable_expert_parallel
)
self.worker.init_device()
# Update process title now that parallel groups are initialized
self.setup_proc_title_and_log_prefix(
enable_ep=vllm_config.parallel_config.enable_expert_parallel
)
if envs.VLLM_ELASTIC_EP_SCALE_UP_LAUNCH:
self.worker.elastic_ep_execute("load_model")
else:
self.worker.load_model()
scheduler_config = vllm_config.scheduler_config