[V1] V1 Enablement Oracle (#13726)
Signed-off-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com> Co-authored-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com> Co-authored-by: Nicolò Lucchesi <nlucches@redhat.com> Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com> Co-authored-by: Michael Goin <michael@neuralmagic.com>
This commit is contained in:
@@ -216,6 +216,12 @@ class LLMEngine:
|
||||
mm_registry: MultiModalRegistry = MULTIMODAL_REGISTRY,
|
||||
use_cached_outputs: bool = False,
|
||||
) -> None:
|
||||
if envs.VLLM_USE_V1:
|
||||
raise ValueError(
|
||||
"Using V0 LLMEngine, but envs.VLLM_USE_V1=True. "
|
||||
"This should not happen. As a workaround, try using "
|
||||
"LLMEngine.from_vllm_config(...) or explicitly set "
|
||||
"VLLM_USE_V1=0 or 1 and report this issue on Github.")
|
||||
|
||||
self.vllm_config = vllm_config
|
||||
self.model_config = vllm_config.model_config
|
||||
@@ -479,6 +485,22 @@ class LLMEngine:
|
||||
f"{distributed_executor_backend}")
|
||||
return executor_class
|
||||
|
||||
@classmethod
|
||||
def from_vllm_config(
|
||||
cls,
|
||||
vllm_config: VllmConfig,
|
||||
usage_context: UsageContext = UsageContext.ENGINE_CONTEXT,
|
||||
stat_loggers: Optional[Dict[str, StatLoggerBase]] = None,
|
||||
disable_log_stats: bool = False,
|
||||
) -> "LLMEngine":
|
||||
return cls(
|
||||
vllm_config=vllm_config,
|
||||
executor_class=cls._get_executor_cls(vllm_config),
|
||||
log_stats=(not disable_log_stats),
|
||||
usage_context=usage_context,
|
||||
stat_loggers=stat_loggers,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_engine_args(
|
||||
cls,
|
||||
@@ -488,19 +510,20 @@ class LLMEngine:
|
||||
) -> "LLMEngine":
|
||||
"""Creates an LLM engine from the engine arguments."""
|
||||
# Create the engine configs.
|
||||
engine_config = engine_args.create_engine_config(usage_context)
|
||||
executor_class = cls._get_executor_cls(engine_config)
|
||||
# Create the LLM engine.
|
||||
engine = cls(
|
||||
vllm_config=engine_config,
|
||||
executor_class=executor_class,
|
||||
log_stats=not engine_args.disable_log_stats,
|
||||
vllm_config = engine_args.create_engine_config(usage_context)
|
||||
|
||||
engine_cls = cls
|
||||
if envs.VLLM_USE_V1:
|
||||
from vllm.v1.engine.llm_engine import LLMEngine as V1LLMEngine
|
||||
engine_cls = V1LLMEngine
|
||||
|
||||
return engine_cls.from_vllm_config(
|
||||
vllm_config=vllm_config,
|
||||
usage_context=usage_context,
|
||||
stat_loggers=stat_loggers,
|
||||
disable_log_stats=engine_args.disable_log_stats,
|
||||
)
|
||||
|
||||
return engine
|
||||
|
||||
def __reduce__(self):
|
||||
# This is to ensure that the LLMEngine is not referenced in
|
||||
# the closure used to initialize Ray worker actors
|
||||
@@ -2097,6 +2120,6 @@ class LLMEngine:
|
||||
return sampling_params
|
||||
|
||||
|
||||
# TODO(v1): Remove this class proxy when V1 goes default.
|
||||
if envs.VLLM_USE_V1:
|
||||
from vllm.v1.engine.llm_engine import LLMEngine # type: ignore
|
||||
if envs.is_set("VLLM_USE_V1") and envs.VLLM_USE_V1:
|
||||
from vllm.v1.engine.llm_engine import LLMEngine as V1LLMEngine
|
||||
LLMEngine = V1LLMEngine # type: ignore
|
||||
|
||||
Reference in New Issue
Block a user