[Feature] Prefill Context Parallel (PCP) basic support (#28718)

Signed-off-by: QiuChunshuo <qiuchunshuo@huawei.com> Signed-off-by: FENP <yuanyongjie.yyj@antgroup.com> Signed-off-by: LookAround <lixushi@huawei.com> Signed-off-by: Jingchun Gao <gaojingchun1@huawei.com> Signed-off-by: zhenwenqi2024 <zhenwenqi_2022@qq.com> Co-authored-by: FENP <yuanyongjie.yyj@antgroup.com> Co-authored-by: LookAround <lixushi@huawei.com> Co-authored-by: Jingchun Gao <gaojingchun1@huawei.com> Co-authored-by: zhenwenqi2024 <zhenwenqi_2022@qq.com> Co-authored-by: Jingchun Gao <63247409+gjc0824@users.noreply.github.com>
2025-11-20 04:52:44 +08:00
parent 02f5903b84
commit 2fd893b4ce
27 changed files with 399 additions and 114 deletions
--- a/vllm/v1/worker/gpu_worker.py
+++ b/vllm/v1/worker/gpu_worker.py
@@ -26,6 +26,7 @@ from vllm.distributed.kv_transfer import (
    has_kv_transfer_group,
 )
 from vllm.distributed.parallel_state import (
+    get_pcp_group,
    get_pp_group,
    get_tp_group,
 )
@@ -733,6 +734,7 @@ class Worker(WorkerBase):
                module.global_num_experts = module.moe_config.num_experts
                module.moe_parallel_config = FusedMoEParallelConfig.make(
                    tp_size_=get_tp_group().world_size,
+                    pcp_size_=get_pcp_group().world_size,
                    dp_size_=get_dp_group().world_size,
                    vllm_parallel_config=parallel_config,
                )
@@ -886,6 +888,7 @@ def init_worker_distributed_environment(
    ensure_model_parallel_initialized(
        parallel_config.tensor_parallel_size,
        parallel_config.pipeline_parallel_size,
+        parallel_config.prefill_context_parallel_size,
        parallel_config.decode_context_parallel_size,
    )