[V0 Deprecation] Remove Prompt Adapters (#20588)
Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
@@ -30,9 +30,9 @@ from vllm.config import (BlockSize, CacheConfig, CacheDType, CompilationConfig,
|
||||
LogprobsMode, LoRAConfig, ModelConfig, ModelDType,
|
||||
ModelImpl, MultiModalConfig, ObservabilityConfig,
|
||||
ParallelConfig, PoolerConfig, PrefixCachingHashAlgo,
|
||||
PromptAdapterConfig, SchedulerConfig, SchedulerPolicy,
|
||||
SpeculativeConfig, TaskOption, TokenizerMode,
|
||||
VllmConfig, get_attr_docs, get_field)
|
||||
SchedulerConfig, SchedulerPolicy, SpeculativeConfig,
|
||||
TaskOption, TokenizerMode, VllmConfig, get_attr_docs,
|
||||
get_field)
|
||||
from vllm.logger import init_logger
|
||||
from vllm.platforms import CpuArchEnum, current_platform
|
||||
from vllm.plugins import load_general_plugins
|
||||
@@ -358,11 +358,6 @@ class EngineArgs:
|
||||
max_cpu_loras: Optional[int] = LoRAConfig.max_cpu_loras
|
||||
lora_dtype: Optional[Union[str, torch.dtype]] = LoRAConfig.lora_dtype
|
||||
lora_extra_vocab_size: int = LoRAConfig.lora_extra_vocab_size
|
||||
# PromptAdapter fields
|
||||
enable_prompt_adapter: bool = False
|
||||
max_prompt_adapters: int = PromptAdapterConfig.max_prompt_adapters
|
||||
max_prompt_adapter_token: int = \
|
||||
PromptAdapterConfig.max_prompt_adapter_token
|
||||
|
||||
num_scheduler_steps: int = SchedulerConfig.num_scheduler_steps
|
||||
multi_step_stream_outputs: bool = SchedulerConfig.multi_step_stream_outputs
|
||||
@@ -437,6 +432,8 @@ class EngineArgs:
|
||||
ParallelConfig.enable_multimodal_encoder_data_parallel
|
||||
|
||||
async_scheduling: bool = SchedulerConfig.async_scheduling
|
||||
# DEPRECATED
|
||||
enable_prompt_adapter: bool = False
|
||||
|
||||
def __post_init__(self):
|
||||
# support `EngineArgs(compilation_config={...})`
|
||||
@@ -729,23 +726,6 @@ class EngineArgs:
|
||||
lora_group.add_argument("--default-mm-loras",
|
||||
**lora_kwargs["default_mm_loras"])
|
||||
|
||||
# PromptAdapter related configs
|
||||
prompt_adapter_kwargs = get_kwargs(PromptAdapterConfig)
|
||||
prompt_adapter_group = parser.add_argument_group(
|
||||
title="PromptAdapterConfig",
|
||||
description=PromptAdapterConfig.__doc__,
|
||||
)
|
||||
prompt_adapter_group.add_argument(
|
||||
"--enable-prompt-adapter",
|
||||
action=argparse.BooleanOptionalAction,
|
||||
help="If True, enable handling of PromptAdapters.")
|
||||
prompt_adapter_group.add_argument(
|
||||
"--max-prompt-adapters",
|
||||
**prompt_adapter_kwargs["max_prompt_adapters"])
|
||||
prompt_adapter_group.add_argument(
|
||||
"--max-prompt-adapter-token",
|
||||
**prompt_adapter_kwargs["max_prompt_adapter_token"])
|
||||
|
||||
# Speculative arguments
|
||||
speculative_group = parser.add_argument_group(
|
||||
title="SpeculativeConfig",
|
||||
@@ -850,6 +830,12 @@ class EngineArgs:
|
||||
parser.add_argument('--disable-log-stats',
|
||||
action='store_true',
|
||||
help='Disable logging statistics.')
|
||||
parser.add_argument('--enable-prompt-adapter',
|
||||
action='store_true',
|
||||
deprecated=True,
|
||||
help='[DEPRECATED] Prompt adapter has been '
|
||||
'removed. Setting this flag to True or False'
|
||||
' has no effect on vLLM behavior.')
|
||||
|
||||
return parser
|
||||
|
||||
@@ -1234,11 +1220,6 @@ class EngineArgs:
|
||||
|
||||
load_config = self.create_load_config()
|
||||
|
||||
prompt_adapter_config = PromptAdapterConfig(
|
||||
max_prompt_adapters=self.max_prompt_adapters,
|
||||
max_prompt_adapter_token=self.max_prompt_adapter_token) \
|
||||
if self.enable_prompt_adapter else None
|
||||
|
||||
decoding_config = DecodingConfig(
|
||||
backend=self.guided_decoding_backend,
|
||||
disable_fallback=self.guided_decoding_disable_fallback,
|
||||
@@ -1266,7 +1247,6 @@ class EngineArgs:
|
||||
load_config=load_config,
|
||||
decoding_config=decoding_config,
|
||||
observability_config=observability_config,
|
||||
prompt_adapter_config=prompt_adapter_config,
|
||||
compilation_config=self.compilation_config,
|
||||
kv_transfer_config=self.kv_transfer_config,
|
||||
kv_events_config=self.kv_events_config,
|
||||
@@ -1342,12 +1322,6 @@ class EngineArgs:
|
||||
recommend_to_remove=False)
|
||||
return False
|
||||
|
||||
# No Prompt Adapter so far.
|
||||
if self.enable_prompt_adapter:
|
||||
_raise_or_fallback(feature_name="--enable-prompt-adapter",
|
||||
recommend_to_remove=False)
|
||||
return False
|
||||
|
||||
# No text embedding inputs so far.
|
||||
if self.enable_prompt_embeds:
|
||||
_raise_or_fallback(feature_name="--enable-prompt-embeds",
|
||||
@@ -1469,7 +1443,6 @@ class EngineArgs:
|
||||
|
||||
if (is_gpu and not use_sliding_window and not use_spec_decode
|
||||
and not self.enable_lora
|
||||
and not self.enable_prompt_adapter
|
||||
and model_config.runner_type != "pooling"):
|
||||
self.enable_chunked_prefill = True
|
||||
logger.warning(
|
||||
|
||||
Reference in New Issue
Block a user