[Feature] Prefill Context Parallel (PCP) basic support (#28718)
Signed-off-by: QiuChunshuo <qiuchunshuo@huawei.com> Signed-off-by: FENP <yuanyongjie.yyj@antgroup.com> Signed-off-by: LookAround <lixushi@huawei.com> Signed-off-by: Jingchun Gao <gaojingchun1@huawei.com> Signed-off-by: zhenwenqi2024 <zhenwenqi_2022@qq.com> Co-authored-by: FENP <yuanyongjie.yyj@antgroup.com> Co-authored-by: LookAround <lixushi@huawei.com> Co-authored-by: Jingchun Gao <gaojingchun1@huawei.com> Co-authored-by: zhenwenqi2024 <zhenwenqi_2022@qq.com> Co-authored-by: Jingchun Gao <63247409+gjc0824@users.noreply.github.com>
This commit is contained in:
@@ -128,6 +128,7 @@ class EngineCore:
|
||||
scheduler_block_size = (
|
||||
vllm_config.cache_config.block_size
|
||||
* vllm_config.parallel_config.decode_context_parallel_size
|
||||
* vllm_config.parallel_config.prefill_context_parallel_size
|
||||
)
|
||||
|
||||
self.scheduler: SchedulerInterface = Scheduler(
|
||||
|
||||
Reference in New Issue
Block a user