[Core] Implement disagg prefill by StatelessProcessGroup (#10502)

This PR provides initial support for single-node disaggregated prefill in 1P1D scenario.
Signed-off-by: KuntaiDu <kuntai@uchicago.edu>
Co-authored-by: ApostaC <yihua98@uchicago.edu>
Co-authored-by: YaoJiayi <120040070@link.cuhk.edu.cn>
This commit is contained in:
Kuntai Du
2024-12-01 19:01:00 -06:00
committed by GitHub
parent c11f172187
commit 0590ec3fd9
33 changed files with 2525 additions and 21 deletions

View File

@@ -9,10 +9,10 @@ import torch
import vllm.envs as envs
from vllm.config import (CacheConfig, CompilationConfig, ConfigFormat,
DecodingConfig, DeviceConfig, HfOverrides, LoadConfig,
LoadFormat, LoRAConfig, ModelConfig,
ObservabilityConfig, ParallelConfig, PoolerConfig,
PromptAdapterConfig, SchedulerConfig,
DecodingConfig, DeviceConfig, HfOverrides,
KVTransferConfig, LoadConfig, LoadFormat, LoRAConfig,
ModelConfig, ObservabilityConfig, ParallelConfig,
PoolerConfig, PromptAdapterConfig, SchedulerConfig,
SpeculativeConfig, TaskOption, TokenizerPoolConfig,
VllmConfig)
from vllm.executor.executor_base import ExecutorBase
@@ -108,6 +108,7 @@ class EngineArgs:
# notice.
distributed_executor_backend: Optional[Union[str,
Type[ExecutorBase]]] = None
# number of P/D disaggregation (or other disaggregation) workers
pipeline_parallel_size: int = 1
tensor_parallel_size: int = 1
max_parallel_loading_workers: Optional[int] = None
@@ -194,6 +195,8 @@ class EngineArgs:
compilation_config: Optional[CompilationConfig] = None
worker_cls: str = "auto"
kv_transfer_config: Optional[KVTransferConfig] = None
def __post_init__(self):
if not self.tokenizer:
self.tokenizer = self.model
@@ -908,6 +911,12 @@ class EngineArgs:
'compilers, using -O without space is also '
'supported. -O3 is equivalent to -O 3.')
parser.add_argument('--kv-transfer-config',
type=KVTransferConfig.from_cli,
default=None,
help='The configurations for distributed KV cache '
'transfer. Should be a JSON string.')
parser.add_argument(
'--worker-cls',
type=str,
@@ -1201,6 +1210,7 @@ class EngineArgs:
observability_config=observability_config,
prompt_adapter_config=prompt_adapter_config,
compilation_config=self.compilation_config,
kv_transfer_config=self.kv_transfer_config,
)
if envs.VLLM_USE_V1: