[V1] Multiprocessing Tensor Parallel Support for v1 (#9856)
Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>
This commit is contained in:
committed by
GitHub
parent
bc192a2b09
commit
28b3a1c7e5
@@ -20,7 +20,7 @@ from vllm.usage.usage_lib import UsageContext
|
||||
from vllm.v1.engine.core_client import EngineCoreClient
|
||||
from vllm.v1.engine.detokenizer import Detokenizer
|
||||
from vllm.v1.engine.processor import Processor
|
||||
from vllm.v1.executor.gpu_executor import GPUExecutor
|
||||
from vllm.v1.executor.abstract import Executor
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
@@ -33,7 +33,7 @@ class LLMEngine:
|
||||
def __init__(
|
||||
self,
|
||||
vllm_config: VllmConfig,
|
||||
executor_class: Type[GPUExecutor],
|
||||
executor_class: Type[Executor],
|
||||
log_stats: bool,
|
||||
usage_context: UsageContext = UsageContext.ENGINE_CONTEXT,
|
||||
stat_loggers: Optional[Dict[str, StatLoggerBase]] = None,
|
||||
@@ -104,10 +104,17 @@ class LLMEngine:
|
||||
|
||||
@classmethod
|
||||
def _get_executor_cls(cls, vllm_config: VllmConfig):
|
||||
return GPUExecutor
|
||||
distributed_executor_backend = (
|
||||
vllm_config.parallel_config.distributed_executor_backend)
|
||||
if distributed_executor_backend == "mp":
|
||||
from vllm.v1.executor.multiproc_executor import MultiprocExecutor
|
||||
executor_class = MultiprocExecutor
|
||||
else:
|
||||
assert (distributed_executor_backend is None)
|
||||
from vllm.v1.executor.uniproc_executor import UniprocExecutor
|
||||
executor_class = UniprocExecutor
|
||||
|
||||
def stop_remote_worker_execution_loop(self) -> None:
|
||||
raise NotImplementedError("TP not implemented yet.")
|
||||
return executor_class
|
||||
|
||||
def get_num_unfinished_requests(self) -> int:
|
||||
return self.detokenizer.get_num_unfinished_requests()
|
||||
|
||||
Reference in New Issue
Block a user