elastic_ep: Fix issues with repeated scale up/down cycles (#37131)
Signed-off-by: Itay Alroy <ialroy@nvidia.com> Co-authored-by: Ron Tourgeman <rtourgeman@nvidia.com>
This commit is contained in:
@@ -602,13 +602,14 @@ class WorkerProc:
|
||||
)
|
||||
|
||||
# Load model
|
||||
is_eep_new_worker = envs.VLLM_ELASTIC_EP_SCALE_UP_LAUNCH
|
||||
if not is_eep_new_worker:
|
||||
self.worker.init_device()
|
||||
# Update process title now that parallel groups are initialized
|
||||
self.setup_proc_title_and_log_prefix(
|
||||
enable_ep=vllm_config.parallel_config.enable_expert_parallel
|
||||
)
|
||||
self.worker.init_device()
|
||||
# Update process title now that parallel groups are initialized
|
||||
self.setup_proc_title_and_log_prefix(
|
||||
enable_ep=vllm_config.parallel_config.enable_expert_parallel
|
||||
)
|
||||
if envs.VLLM_ELASTIC_EP_SCALE_UP_LAUNCH:
|
||||
self.worker.elastic_ep_execute("load_model")
|
||||
else:
|
||||
self.worker.load_model()
|
||||
|
||||
scheduler_config = vllm_config.scheduler_config
|
||||
|
||||
Reference in New Issue
Block a user