[Feature] Prefill Context Parallel (PCP) basic support (#28718)
Signed-off-by: QiuChunshuo <qiuchunshuo@huawei.com> Signed-off-by: FENP <yuanyongjie.yyj@antgroup.com> Signed-off-by: LookAround <lixushi@huawei.com> Signed-off-by: Jingchun Gao <gaojingchun1@huawei.com> Signed-off-by: zhenwenqi2024 <zhenwenqi_2022@qq.com> Co-authored-by: FENP <yuanyongjie.yyj@antgroup.com> Co-authored-by: LookAround <lixushi@huawei.com> Co-authored-by: Jingchun Gao <gaojingchun1@huawei.com> Co-authored-by: zhenwenqi2024 <zhenwenqi_2022@qq.com> Co-authored-by: Jingchun Gao <63247409+gjc0824@users.noreply.github.com>
This commit is contained in:
@@ -31,7 +31,7 @@ class ParallelSetup(NamedTuple):
|
||||
tp_size: int
|
||||
pp_size: int
|
||||
dcp_size: int
|
||||
dcp_kv_cache_interleave_size: int
|
||||
cp_kv_cache_interleave_size: int
|
||||
eager_mode: bool
|
||||
chunked_prefill: bool
|
||||
|
||||
@@ -55,7 +55,7 @@ class CPTestSettings:
|
||||
tp_base: int = 4,
|
||||
pp_base: int = 1,
|
||||
dcp_base: int = 1,
|
||||
dcp_kv_cache_interleave_size: int = 1,
|
||||
cp_kv_cache_interleave_size: int = 1,
|
||||
multi_node_only: bool = False,
|
||||
runner: RunnerOption = "auto",
|
||||
load_format: str | None = None,
|
||||
@@ -71,7 +71,7 @@ class CPTestSettings:
|
||||
tp_size=tp_base,
|
||||
pp_size=pp_multiplier * pp_base,
|
||||
dcp_size=int(dcp_multiplier * tp_base),
|
||||
dcp_kv_cache_interleave_size=dcp_kv_cache_interleave_size,
|
||||
cp_kv_cache_interleave_size=cp_kv_cache_interleave_size,
|
||||
eager_mode=eager_mode_val,
|
||||
chunked_prefill=chunked_prefill_val,
|
||||
)
|
||||
@@ -116,7 +116,7 @@ def _compare_cp_with_tp(
|
||||
tp_size,
|
||||
pp_size,
|
||||
dcp_size,
|
||||
dcp_kv_cache_interleave_size,
|
||||
cp_kv_cache_interleave_size,
|
||||
eager_mode,
|
||||
chunked_prefill,
|
||||
) = parallel_setup
|
||||
@@ -197,7 +197,7 @@ def _compare_cp_with_tp(
|
||||
"--decode-context-parallel-size",
|
||||
str(dcp_size),
|
||||
"--dcp-kv-cache-interleave-size",
|
||||
str(dcp_kv_cache_interleave_size),
|
||||
str(cp_kv_cache_interleave_size),
|
||||
"--distributed-executor-backend",
|
||||
distributed_backend,
|
||||
]
|
||||
@@ -227,7 +227,7 @@ CP_TEXT_GENERATION_MODELS = {
|
||||
"deepseek-ai/DeepSeek-V2-Lite-Chat": [
|
||||
CPTestSettings.detailed(),
|
||||
CPTestSettings.detailed(tp_base=2),
|
||||
CPTestSettings.detailed(tp_base=2, dcp_kv_cache_interleave_size=64),
|
||||
CPTestSettings.detailed(tp_base=2, cp_kv_cache_interleave_size=64),
|
||||
],
|
||||
"bigcode/gpt_bigcode-santacoder": [
|
||||
CPTestSettings.detailed(),
|
||||
|
||||
Reference in New Issue
Block a user