[V1] Multiprocessing Tensor Parallel Support for v1 (#9856)

Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>
This commit is contained in:
Tyler Michael Smith
2024-12-10 01:28:14 -05:00
committed by GitHub
parent bc192a2b09
commit 28b3a1c7e5
21 changed files with 732 additions and 145 deletions

View File

@@ -20,7 +20,7 @@ from vllm.usage.usage_lib import UsageContext
from vllm.v1.engine.core_client import EngineCoreClient
from vllm.v1.engine.detokenizer import Detokenizer
from vllm.v1.engine.processor import Processor
from vllm.v1.executor.gpu_executor import GPUExecutor
from vllm.v1.executor.abstract import Executor
logger = init_logger(__name__)
@@ -33,7 +33,7 @@ class LLMEngine:
def __init__(
self,
vllm_config: VllmConfig,
executor_class: Type[GPUExecutor],
executor_class: Type[Executor],
log_stats: bool,
usage_context: UsageContext = UsageContext.ENGINE_CONTEXT,
stat_loggers: Optional[Dict[str, StatLoggerBase]] = None,
@@ -104,10 +104,17 @@ class LLMEngine:
@classmethod
def _get_executor_cls(cls, vllm_config: VllmConfig):
return GPUExecutor
distributed_executor_backend = (
vllm_config.parallel_config.distributed_executor_backend)
if distributed_executor_backend == "mp":
from vllm.v1.executor.multiproc_executor import MultiprocExecutor
executor_class = MultiprocExecutor
else:
assert (distributed_executor_backend is None)
from vllm.v1.executor.uniproc_executor import UniprocExecutor
executor_class = UniprocExecutor
def stop_remote_worker_execution_loop(self) -> None:
raise NotImplementedError("TP not implemented yet.")
return executor_class
def get_num_unfinished_requests(self) -> int:
return self.detokenizer.get_num_unfinished_requests()