[2/N] executor pass the complete config to worker/modelrunner (#9938)

Signed-off-by: youkaichao <youkaichao@gmail.com>
Co-authored-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
youkaichao
2024-11-02 07:35:05 -07:00
committed by GitHub
parent 1d4cfe2be1
commit e893795443
44 changed files with 249 additions and 579 deletions

View File

@@ -13,8 +13,9 @@ import torch
from typing_extensions import TypeIs, TypeVar
import vllm.envs as envs
from vllm.config import (DecodingConfig, EngineConfig, LoRAConfig, ModelConfig,
ObservabilityConfig, ParallelConfig, SchedulerConfig)
from vllm.config import (DecodingConfig, LoRAConfig, ModelConfig,
ObservabilityConfig, ParallelConfig, SchedulerConfig,
VllmConfig)
from vllm.core.scheduler import (ScheduledSequenceGroup, Scheduler,
SchedulerOutputs)
from vllm.engine.arg_utils import EngineArgs
@@ -219,7 +220,7 @@ class LLMEngine:
def __init__(
self,
vllm_config: EngineConfig,
vllm_config: VllmConfig,
executor_class: Type[ExecutorBase],
log_stats: bool,
usage_context: UsageContext = UsageContext.ENGINE_CONTEXT,
@@ -500,7 +501,7 @@ class LLMEngine:
@classmethod
def _get_executor_cls(cls,
engine_config: EngineConfig) -> Type[ExecutorBase]:
engine_config: VllmConfig) -> Type[ExecutorBase]:
distributed_executor_backend = (
engine_config.parallel_config.distributed_executor_backend)
# Initialize the cluster and specify the executor class.