[V0 Deprecation] Remove code related to per-request logits processors (#34400)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -26,13 +26,11 @@ from vllm.entrypoints.openai.engine.protocol import (
|
||||
FunctionCall,
|
||||
FunctionDefinition,
|
||||
LegacyStructuralTagResponseFormat,
|
||||
LogitsProcessors,
|
||||
OpenAIBaseModel,
|
||||
StreamOptions,
|
||||
StructuralTagResponseFormat,
|
||||
ToolCall,
|
||||
UsageInfo,
|
||||
get_logits_processors,
|
||||
)
|
||||
from vllm.exceptions import VLLMValidationError
|
||||
from vllm.logger import init_logger
|
||||
@@ -293,19 +291,7 @@ class ChatCompletionRequest(OpenAIBaseModel):
|
||||
"through out the inference process and return in response."
|
||||
),
|
||||
)
|
||||
logits_processors: LogitsProcessors | None = Field(
|
||||
default=None,
|
||||
description=(
|
||||
"A list of either qualified names of logits processors, or "
|
||||
"constructor objects, to apply when sampling. A constructor is "
|
||||
"a JSON object with a required 'qualname' field specifying the "
|
||||
"qualified name of the processor class/factory, and optional "
|
||||
"'args' and 'kwargs' fields containing positional and keyword "
|
||||
"arguments. For example: {'qualname': "
|
||||
"'my_module.MyLogitsProcessor', 'args': [1, 2], 'kwargs': "
|
||||
"{'param': 'value'}}."
|
||||
),
|
||||
)
|
||||
|
||||
return_tokens_as_token_ids: bool | None = Field(
|
||||
default=None,
|
||||
description=(
|
||||
@@ -324,6 +310,7 @@ class ChatCompletionRequest(OpenAIBaseModel):
|
||||
"need to map generated text back to input tokens."
|
||||
),
|
||||
)
|
||||
|
||||
cache_salt: str | None = Field(
|
||||
default=None,
|
||||
description=(
|
||||
@@ -335,6 +322,7 @@ class ChatCompletionRequest(OpenAIBaseModel):
|
||||
"to 256 bit)."
|
||||
),
|
||||
)
|
||||
|
||||
kv_transfer_params: dict[str, Any] | None = Field(
|
||||
default=None,
|
||||
description="KVTransfer parameters used for disaggregated serving.",
|
||||
@@ -417,7 +405,6 @@ class ChatCompletionRequest(OpenAIBaseModel):
|
||||
def to_sampling_params(
|
||||
self,
|
||||
max_tokens: int,
|
||||
logits_processor_pattern: str | None,
|
||||
default_sampling_params: dict,
|
||||
) -> SamplingParams:
|
||||
# Default parameters
|
||||
@@ -502,9 +489,6 @@ class ChatCompletionRequest(OpenAIBaseModel):
|
||||
min_tokens=self.min_tokens,
|
||||
skip_special_tokens=self.skip_special_tokens,
|
||||
spaces_between_special_tokens=self.spaces_between_special_tokens,
|
||||
logits_processors=get_logits_processors(
|
||||
self.logits_processors, logits_processor_pattern
|
||||
),
|
||||
include_stop_str_in_output=self.include_stop_str_in_output,
|
||||
truncate_prompt_tokens=self.truncate_prompt_tokens,
|
||||
output_kind=RequestOutputKind.DELTA
|
||||
|
||||
@@ -86,7 +86,6 @@ from vllm.tool_parsers import ToolParser
|
||||
from vllm.tool_parsers.mistral_tool_parser import MistralToolCall
|
||||
from vllm.tool_parsers.utils import partial_json_loads
|
||||
from vllm.utils.collection_utils import as_list
|
||||
from vllm.v1.sample.logits_processor import validate_logits_processors_parameters
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
@@ -130,9 +129,6 @@ class OpenAIServingChat(OpenAIServing):
|
||||
self.enable_log_outputs = enable_log_outputs
|
||||
self.enable_log_deltas = enable_log_deltas
|
||||
|
||||
# set up logits processors
|
||||
self.logits_processors = self.model_config.logits_processors
|
||||
|
||||
# set up reasoning parser
|
||||
self.reasoning_parser_cls = ParserManager.get_reasoning_parser(
|
||||
reasoning_parser_name=reasoning_parser
|
||||
@@ -403,13 +399,8 @@ class OpenAIServingChat(OpenAIServing):
|
||||
else:
|
||||
sampling_params = request.to_sampling_params(
|
||||
max_tokens,
|
||||
self.model_config.logits_processor_pattern,
|
||||
self.default_sampling_params,
|
||||
)
|
||||
validate_logits_processors_parameters(
|
||||
self.logits_processors,
|
||||
sampling_params,
|
||||
)
|
||||
|
||||
self._log_inputs(
|
||||
sub_request_id,
|
||||
|
||||
Reference in New Issue
Block a user