[V0 Deprecation] Remove code related to per-request logits processors (#34400)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2026-02-12 20:44:28 +08:00
committed by GitHub
parent f5897613fb
commit fb455ed547
12 changed files with 15 additions and 122 deletions

View File

@@ -26,13 +26,11 @@ from vllm.entrypoints.openai.engine.protocol import (
FunctionCall,
FunctionDefinition,
LegacyStructuralTagResponseFormat,
LogitsProcessors,
OpenAIBaseModel,
StreamOptions,
StructuralTagResponseFormat,
ToolCall,
UsageInfo,
get_logits_processors,
)
from vllm.exceptions import VLLMValidationError
from vllm.logger import init_logger
@@ -293,19 +291,7 @@ class ChatCompletionRequest(OpenAIBaseModel):
"through out the inference process and return in response."
),
)
logits_processors: LogitsProcessors | None = Field(
default=None,
description=(
"A list of either qualified names of logits processors, or "
"constructor objects, to apply when sampling. A constructor is "
"a JSON object with a required 'qualname' field specifying the "
"qualified name of the processor class/factory, and optional "
"'args' and 'kwargs' fields containing positional and keyword "
"arguments. For example: {'qualname': "
"'my_module.MyLogitsProcessor', 'args': [1, 2], 'kwargs': "
"{'param': 'value'}}."
),
)
return_tokens_as_token_ids: bool | None = Field(
default=None,
description=(
@@ -324,6 +310,7 @@ class ChatCompletionRequest(OpenAIBaseModel):
"need to map generated text back to input tokens."
),
)
cache_salt: str | None = Field(
default=None,
description=(
@@ -335,6 +322,7 @@ class ChatCompletionRequest(OpenAIBaseModel):
"to 256 bit)."
),
)
kv_transfer_params: dict[str, Any] | None = Field(
default=None,
description="KVTransfer parameters used for disaggregated serving.",
@@ -417,7 +405,6 @@ class ChatCompletionRequest(OpenAIBaseModel):
def to_sampling_params(
self,
max_tokens: int,
logits_processor_pattern: str | None,
default_sampling_params: dict,
) -> SamplingParams:
# Default parameters
@@ -502,9 +489,6 @@ class ChatCompletionRequest(OpenAIBaseModel):
min_tokens=self.min_tokens,
skip_special_tokens=self.skip_special_tokens,
spaces_between_special_tokens=self.spaces_between_special_tokens,
logits_processors=get_logits_processors(
self.logits_processors, logits_processor_pattern
),
include_stop_str_in_output=self.include_stop_str_in_output,
truncate_prompt_tokens=self.truncate_prompt_tokens,
output_kind=RequestOutputKind.DELTA

View File

@@ -86,7 +86,6 @@ from vllm.tool_parsers import ToolParser
from vllm.tool_parsers.mistral_tool_parser import MistralToolCall
from vllm.tool_parsers.utils import partial_json_loads
from vllm.utils.collection_utils import as_list
from vllm.v1.sample.logits_processor import validate_logits_processors_parameters
logger = init_logger(__name__)
@@ -130,9 +129,6 @@ class OpenAIServingChat(OpenAIServing):
self.enable_log_outputs = enable_log_outputs
self.enable_log_deltas = enable_log_deltas
# set up logits processors
self.logits_processors = self.model_config.logits_processors
# set up reasoning parser
self.reasoning_parser_cls = ParserManager.get_reasoning_parser(
reasoning_parser_name=reasoning_parser
@@ -403,13 +399,8 @@ class OpenAIServingChat(OpenAIServing):
else:
sampling_params = request.to_sampling_params(
max_tokens,
self.model_config.logits_processor_pattern,
self.default_sampling_params,
)
validate_logits_processors_parameters(
self.logits_processors,
sampling_params,
)
self._log_inputs(
sub_request_id,