[Deprecation] Remove inputs arg fallback in Engine classes (#18799)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -11,10 +11,10 @@ from functools import partial
|
||||
from typing import (TYPE_CHECKING, Any, Callable, ClassVar, Deque, Dict,
|
||||
Iterable, List, Literal, Mapping, NamedTuple, Optional)
|
||||
from typing import Sequence as GenericSequence
|
||||
from typing import Set, Type, Union, cast, overload
|
||||
from typing import Set, Type, Union, cast
|
||||
|
||||
import torch
|
||||
from typing_extensions import TypeVar, deprecated
|
||||
from typing_extensions import TypeVar
|
||||
|
||||
import vllm.envs as envs
|
||||
from vllm.config import (DecodingConfig, LoRAConfig, ModelConfig,
|
||||
@@ -58,8 +58,7 @@ from vllm.transformers_utils.tokenizer_group import (
|
||||
TokenizerGroup, init_tokenizer_from_configs)
|
||||
from vllm.usage.usage_lib import (UsageContext, is_usage_stats_enabled,
|
||||
usage_message)
|
||||
from vllm.utils import (Counter, Device, deprecate_kwargs,
|
||||
resolve_obj_by_qualname, weak_bind)
|
||||
from vllm.utils import Counter, Device, resolve_obj_by_qualname, weak_bind
|
||||
from vllm.version import __version__ as VLLM_VERSION
|
||||
from vllm.worker.model_runner_base import InputProcessingError
|
||||
|
||||
@@ -629,7 +628,6 @@ class LLMEngine:
|
||||
def stop_remote_worker_execution_loop(self) -> None:
|
||||
self.model_executor.stop_remote_worker_execution_loop()
|
||||
|
||||
@overload
|
||||
def add_request(
|
||||
self,
|
||||
request_id: str,
|
||||
@@ -641,42 +639,6 @@ class LLMEngine:
|
||||
trace_headers: Optional[Mapping[str, str]] = None,
|
||||
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
|
||||
priority: int = 0,
|
||||
) -> None:
|
||||
...
|
||||
|
||||
@overload
|
||||
@deprecated("'inputs' will be renamed to 'prompt")
|
||||
def add_request(
|
||||
self,
|
||||
request_id: str,
|
||||
*,
|
||||
inputs: PromptType,
|
||||
params: Union[SamplingParams, PoolingParams],
|
||||
arrival_time: Optional[float] = None,
|
||||
lora_request: Optional[LoRARequest] = None,
|
||||
trace_headers: Optional[Mapping[str, str]] = None,
|
||||
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
|
||||
priority: int = 0,
|
||||
) -> None:
|
||||
...
|
||||
|
||||
@deprecate_kwargs(
|
||||
"inputs",
|
||||
additional_message="Please use the 'prompt' parameter instead.",
|
||||
)
|
||||
def add_request(
|
||||
self,
|
||||
request_id: str,
|
||||
prompt: Optional[PromptType] = None,
|
||||
params: Optional[Union[SamplingParams, PoolingParams]] = None,
|
||||
arrival_time: Optional[float] = None,
|
||||
lora_request: Optional[LoRARequest] = None,
|
||||
tokenization_kwargs: Optional[dict[str, Any]] = None,
|
||||
trace_headers: Optional[Mapping[str, str]] = None,
|
||||
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
|
||||
priority: int = 0,
|
||||
*,
|
||||
inputs: Optional[PromptType] = None, # DEPRECATED
|
||||
) -> None:
|
||||
"""Add a request to the engine's request pool.
|
||||
|
||||
@@ -725,10 +687,6 @@ class LLMEngine:
|
||||
>>> # continue the request processing
|
||||
>>> ...
|
||||
"""
|
||||
if inputs is not None:
|
||||
prompt = inputs
|
||||
assert prompt is not None and params is not None
|
||||
|
||||
if lora_request is not None and not self.lora_config:
|
||||
raise ValueError(f"Got lora_request {lora_request} but LoRA is "
|
||||
"not enabled!")
|
||||
|
||||
Reference in New Issue
Block a user