[DCP] Support Decode Context Parallel (DCP) for GQA with FlashAttention (#24864)

Signed-off-by: yuanyongjie.yyj <yuanyongjie.yyj@antgroup.com>
Signed-off-by: FENP <32334296+FENP@users.noreply.github.com>
Signed-off-by: Jaya Yuan <yuanyongjie.yyj@antgroup.com>
This commit is contained in:
Jaya Yuan
2025-10-14 21:07:50 +08:00
committed by GitHub
parent fdd32750f0
commit ea97940d6c
7 changed files with 209 additions and 33 deletions

View File

@@ -204,17 +204,21 @@ def _compare_cp_with_tp(
CP_TEXT_GENERATION_MODELS = {
# [MLA attention only]
"deepseek-ai/DeepSeek-V2-Lite-Chat": [
CPTestSettings.detailed(),
CPTestSettings.detailed(tp_base=2),
],
"bigcode/gpt_bigcode-santacoder": [
CPTestSettings.detailed(),
CPTestSettings.detailed(tp_base=2),
],
}
CP_TEST_MODELS = [
# TODO support other models
# [LANGUAGE GENERATION]
"deepseek-ai/DeepSeek-V2-Lite-Chat",
"bigcode/gpt_bigcode-santacoder",
]