[Feature] Prefill Context Parallel (PCP) basic support (#28718)
Signed-off-by: QiuChunshuo <qiuchunshuo@huawei.com> Signed-off-by: FENP <yuanyongjie.yyj@antgroup.com> Signed-off-by: LookAround <lixushi@huawei.com> Signed-off-by: Jingchun Gao <gaojingchun1@huawei.com> Signed-off-by: zhenwenqi2024 <zhenwenqi_2022@qq.com> Co-authored-by: FENP <yuanyongjie.yyj@antgroup.com> Co-authored-by: LookAround <lixushi@huawei.com> Co-authored-by: Jingchun Gao <gaojingchun1@huawei.com> Co-authored-by: zhenwenqi2024 <zhenwenqi_2022@qq.com> Co-authored-by: Jingchun Gao <63247409+gjc0824@users.noreply.github.com>
This commit is contained in:
@@ -35,6 +35,7 @@ from vllm.distributed.parallel_state import (
|
||||
get_dp_group,
|
||||
get_ep_group,
|
||||
get_inner_dp_world_group,
|
||||
get_pcp_group,
|
||||
get_pp_group,
|
||||
get_tp_group,
|
||||
)
|
||||
@@ -110,12 +111,14 @@ class MultiprocExecutor(Executor):
|
||||
f"({self.parallel_config.nnodes_within_dp}). "
|
||||
)
|
||||
self.local_world_size = self.parallel_config.local_world_size
|
||||
tensor_parallel_size = self.parallel_config.tensor_parallel_size
|
||||
pp_parallel_size = self.parallel_config.pipeline_parallel_size
|
||||
assert self.world_size == tensor_parallel_size * pp_parallel_size, (
|
||||
tp_size = self.parallel_config.tensor_parallel_size
|
||||
pp_size = self.parallel_config.pipeline_parallel_size
|
||||
pcp_size = self.parallel_config.prefill_context_parallel_size
|
||||
assert self.world_size == tp_size * pp_size * pcp_size, (
|
||||
f"world_size ({self.world_size}) must be equal to the "
|
||||
f"tensor_parallel_size ({tensor_parallel_size}) x pipeline"
|
||||
f"_parallel_size ({pp_parallel_size}). "
|
||||
f"tensor_parallel_size ({tp_size}) x pipeline"
|
||||
f"_parallel_size ({pp_size}) x prefill_context"
|
||||
f"_parallel_size ({pcp_size}). "
|
||||
)
|
||||
|
||||
# Set multiprocessing envs
|
||||
@@ -424,7 +427,11 @@ class MultiprocExecutor(Executor):
|
||||
# 16-23, PP rank 2
|
||||
# 24-31, PP rank 3
|
||||
# so world_size - tp_size = 32 - 8 = 24 should be PP rank = -1 (i.e. 3)
|
||||
return self.world_size - self.parallel_config.tensor_parallel_size
|
||||
return (
|
||||
self.world_size
|
||||
- self.parallel_config.tensor_parallel_size
|
||||
* self.parallel_config.prefill_context_parallel_size
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -828,6 +835,8 @@ class WorkerProc:
|
||||
dp_rank = get_dp_group().rank_in_group
|
||||
pp_size = get_pp_group().world_size
|
||||
pp_rank = get_pp_group().rank_in_group
|
||||
pcp_size = get_pcp_group().world_size
|
||||
pcp_rank = get_pcp_group().rank_in_group
|
||||
tp_size = get_tp_group().world_size
|
||||
tp_rank = get_tp_group().rank_in_group
|
||||
dcp_size = get_dcp_group().world_size
|
||||
@@ -837,6 +846,8 @@ class WorkerProc:
|
||||
process_name += f"_DP{dp_rank}"
|
||||
if pp_size > 1:
|
||||
process_name += f"_PP{pp_rank}"
|
||||
if pcp_size > 1:
|
||||
process_name += f"_PCP{pcp_rank}"
|
||||
if tp_size > 1:
|
||||
process_name += f"_TP{tp_rank}"
|
||||
if dcp_size > 1:
|
||||
|
||||
Reference in New Issue
Block a user