diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py index 141e5a459..d5e75824d 100644 --- a/vllm/v1/engine/core.py +++ b/vllm/v1/engine/core.py @@ -911,6 +911,17 @@ class EngineCoreProc(EngineCore): set_process_title("EngineCore") decorate_logs() + if data_parallel and vllm_config.kv_transfer_config is not None: + # modify the engine_id and append the local_dp_rank to it to ensure + # that the kv_transfer_config is unique for each DP rank. + vllm_config.kv_transfer_config.engine_id = ( + f"{vllm_config.kv_transfer_config.engine_id}_dp{local_dp_rank}" + ) + logger.debug( + "Setting kv_transfer_config.engine_id to %s", + vllm_config.kv_transfer_config.engine_id, + ) + parallel_config.data_parallel_index = dp_rank if data_parallel and vllm_config.model_config.is_moe: # Set data parallel rank for this engine process. @@ -1285,17 +1296,6 @@ class DPEngineCoreProc(EngineCoreProc): assert local_dp_rank is not None assert 0 <= local_dp_rank <= dp_rank < dp_size - if vllm_config.kv_transfer_config is not None: - # modify the engine_id and append the local_dp_rank to it to ensure - # that the kv_transfer_config is unique for each DP rank. - vllm_config.kv_transfer_config.engine_id = ( - f"{vllm_config.kv_transfer_config.engine_id}_dp{local_dp_rank}" - ) - logger.debug( - "Setting kv_transfer_config.engine_id to %s", - vllm_config.kv_transfer_config.engine_id, - ) - self.dp_rank = dp_rank self.dp_group = vllm_config.parallel_config.stateless_init_dp_group() diff --git a/vllm/v1/engine/utils.py b/vllm/v1/engine/utils.py index 4056c225c..cae613920 100644 --- a/vllm/v1/engine/utils.py +++ b/vllm/v1/engine/utils.py @@ -313,6 +313,13 @@ class CoreEngineActorManager: dp_vllm_config.parallel_config.placement_group = pg local_client = index < local_engine_count + if dp_size > 1 and dp_vllm_config.kv_transfer_config is not None: + # modify the engine_id and append the local_dp_rank to it to ensure + # that the kv_transfer_config is unique for each DP rank. + dp_vllm_config.kv_transfer_config.engine_id = ( + f"{dp_vllm_config.kv_transfer_config.engine_id}_dp{local_index}" + ) + # Ray XPU known issue: dpctl initializes the GPU runtime early, so # setting device env vars in Ray actor's initialization method # will not affect device selection. See: