[Feature] Prefill Context Parallel (PCP) basic support (#28718)

Signed-off-by: QiuChunshuo <qiuchunshuo@huawei.com> Signed-off-by: FENP <yuanyongjie.yyj@antgroup.com> Signed-off-by: LookAround <lixushi@huawei.com> Signed-off-by: Jingchun Gao <gaojingchun1@huawei.com> Signed-off-by: zhenwenqi2024 <zhenwenqi_2022@qq.com> Co-authored-by: FENP <yuanyongjie.yyj@antgroup.com> Co-authored-by: LookAround <lixushi@huawei.com> Co-authored-by: Jingchun Gao <gaojingchun1@huawei.com> Co-authored-by: zhenwenqi2024 <zhenwenqi_2022@qq.com> Co-authored-by: Jingchun Gao <63247409+gjc0824@users.noreply.github.com>
2025-11-20 04:52:44 +08:00
parent 02f5903b84
commit 2fd893b4ce
27 changed files with 399 additions and 114 deletions
--- a/tests/v1/worker/test_gpu_model_runner.py
+++ b/tests/v1/worker/test_gpu_model_runner.py
@@ -956,7 +956,7 @@ def test_hybrid_block_table_initialization():
    max_num_reqs = 10
    max_num_blocks_per_req = 20
    max_num_batched_tokens = 512
-    dcp_kv_cache_interleave_size = 8
+    cp_kv_cache_interleave_size = 8

    block_table = BlockTable(
        block_size=block_size,
@@ -966,7 +966,7 @@ def test_hybrid_block_table_initialization():
        pin_memory=False,
        device=torch.device(DEVICE),
        kernel_block_size=kernel_block_sizes[0],
-        dcp_kv_cache_interleave_size=dcp_kv_cache_interleave_size,
+        cp_kv_cache_interleave_size=cp_kv_cache_interleave_size,
    )

    # Verify hybrid block configuration