[Feature] Prefill Context Parallel (PCP) basic support (#28718)
Signed-off-by: QiuChunshuo <qiuchunshuo@huawei.com> Signed-off-by: FENP <yuanyongjie.yyj@antgroup.com> Signed-off-by: LookAround <lixushi@huawei.com> Signed-off-by: Jingchun Gao <gaojingchun1@huawei.com> Signed-off-by: zhenwenqi2024 <zhenwenqi_2022@qq.com> Co-authored-by: FENP <yuanyongjie.yyj@antgroup.com> Co-authored-by: LookAround <lixushi@huawei.com> Co-authored-by: Jingchun Gao <gaojingchun1@huawei.com> Co-authored-by: zhenwenqi2024 <zhenwenqi_2022@qq.com> Co-authored-by: Jingchun Gao <63247409+gjc0824@users.noreply.github.com>
This commit is contained in:
@@ -956,7 +956,7 @@ def test_hybrid_block_table_initialization():
|
||||
max_num_reqs = 10
|
||||
max_num_blocks_per_req = 20
|
||||
max_num_batched_tokens = 512
|
||||
dcp_kv_cache_interleave_size = 8
|
||||
cp_kv_cache_interleave_size = 8
|
||||
|
||||
block_table = BlockTable(
|
||||
block_size=block_size,
|
||||
@@ -966,7 +966,7 @@ def test_hybrid_block_table_initialization():
|
||||
pin_memory=False,
|
||||
device=torch.device(DEVICE),
|
||||
kernel_block_size=kernel_block_sizes[0],
|
||||
dcp_kv_cache_interleave_size=dcp_kv_cache_interleave_size,
|
||||
cp_kv_cache_interleave_size=cp_kv_cache_interleave_size,
|
||||
)
|
||||
|
||||
# Verify hybrid block configuration
|
||||
|
||||
Reference in New Issue
Block a user