[BugFix] Fix P/D with non-MoE DP (#33037)
Signed-off-by: Nick Hill <nickhill123@gmail.com>
(cherry picked from commit 0cd259b2d8)
This commit is contained in:
@@ -911,6 +911,17 @@ class EngineCoreProc(EngineCore):
|
||||
set_process_title("EngineCore")
|
||||
decorate_logs()
|
||||
|
||||
if data_parallel and vllm_config.kv_transfer_config is not None:
|
||||
# modify the engine_id and append the local_dp_rank to it to ensure
|
||||
# that the kv_transfer_config is unique for each DP rank.
|
||||
vllm_config.kv_transfer_config.engine_id = (
|
||||
f"{vllm_config.kv_transfer_config.engine_id}_dp{local_dp_rank}"
|
||||
)
|
||||
logger.debug(
|
||||
"Setting kv_transfer_config.engine_id to %s",
|
||||
vllm_config.kv_transfer_config.engine_id,
|
||||
)
|
||||
|
||||
parallel_config.data_parallel_index = dp_rank
|
||||
if data_parallel and vllm_config.model_config.is_moe:
|
||||
# Set data parallel rank for this engine process.
|
||||
@@ -1285,17 +1296,6 @@ class DPEngineCoreProc(EngineCoreProc):
|
||||
assert local_dp_rank is not None
|
||||
assert 0 <= local_dp_rank <= dp_rank < dp_size
|
||||
|
||||
if vllm_config.kv_transfer_config is not None:
|
||||
# modify the engine_id and append the local_dp_rank to it to ensure
|
||||
# that the kv_transfer_config is unique for each DP rank.
|
||||
vllm_config.kv_transfer_config.engine_id = (
|
||||
f"{vllm_config.kv_transfer_config.engine_id}_dp{local_dp_rank}"
|
||||
)
|
||||
logger.debug(
|
||||
"Setting kv_transfer_config.engine_id to %s",
|
||||
vllm_config.kv_transfer_config.engine_id,
|
||||
)
|
||||
|
||||
self.dp_rank = dp_rank
|
||||
self.dp_group = vllm_config.parallel_config.stateless_init_dp_group()
|
||||
|
||||
|
||||
@@ -313,6 +313,13 @@ class CoreEngineActorManager:
|
||||
dp_vllm_config.parallel_config.placement_group = pg
|
||||
local_client = index < local_engine_count
|
||||
|
||||
if dp_size > 1 and dp_vllm_config.kv_transfer_config is not None:
|
||||
# modify the engine_id and append the local_dp_rank to it to ensure
|
||||
# that the kv_transfer_config is unique for each DP rank.
|
||||
dp_vllm_config.kv_transfer_config.engine_id = (
|
||||
f"{dp_vllm_config.kv_transfer_config.engine_id}_dp{local_index}"
|
||||
)
|
||||
|
||||
# Ray XPU known issue: dpctl initializes the GPU runtime early, so
|
||||
# setting device env vars in Ray actor's initialization method
|
||||
# will not affect device selection. See:
|
||||
|
||||
Reference in New Issue
Block a user