[Core] Encoder separation for Encode-Prefill-Decode Disaggregation (#25233)

Signed-off-by: n00909098 <nguyen.kha.long@huawei.com>
Signed-off-by: knlnguyen1802 <knlnguyen1802@gmail.com>
Signed-off-by: herotai214 <herotai214@gmail.com>
Signed-off-by: Khuong Le <khuong.le.manh@huawei.com>
Signed-off-by: Khuong Le <lemanhkhuong2611@gmail.com>
Co-authored-by: n00909098 <nguyen.kha.long@huawei.com>
Co-authored-by: knlnguyen1802 <knlnguyen1802@gmail.com>
Co-authored-by: herotai214 <herotai214@gmail.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Khuong Le <khuong.le.manh@huawei.com>
Co-authored-by: Khuong Le <lemanhkhuong2611@gmail.com>
This commit is contained in:
Chenguang Zheng
2025-11-12 10:58:33 +08:00
committed by GitHub
parent cbb799e314
commit 4ccffe561f
31 changed files with 5026 additions and 42 deletions

View File

@@ -20,6 +20,7 @@ from vllm.distributed import (
init_distributed_environment,
set_custom_all_reduce,
)
from vllm.distributed.ec_transfer import ensure_ec_transfer_initialized
from vllm.distributed.kv_transfer import (
ensure_kv_transfer_initialized,
get_kv_transfer_group,
@@ -887,3 +888,7 @@ def init_worker_distributed_environment(
parallel_config.pipeline_parallel_size,
parallel_config.decode_context_parallel_size,
)
# Init ec connector here before KV caches caches init
# NOTE: We do not init KV caches for Encoder-only instance in EPD disagg mode
ensure_ec_transfer_initialized(vllm_config)