From ce884756f062072ce7a8e36ae70b4f74e4fd79fd Mon Sep 17 00:00:00 2001 From: Chauncey Date: Tue, 31 Mar 2026 16:45:57 +0800 Subject: [PATCH] [Feature]: add presence_penalty and frequency_penalty fields to Responses API (#38613) Signed-off-by: chaunceyjiang --- vllm/entrypoints/openai/responses/protocol.py | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/vllm/entrypoints/openai/responses/protocol.py b/vllm/entrypoints/openai/responses/protocol.py index 609d8e40f..d34ba2d75 100644 --- a/vllm/entrypoints/openai/responses/protocol.py +++ b/vllm/entrypoints/openai/responses/protocol.py @@ -173,6 +173,24 @@ class ResponsesRequest(OpenAIBaseModel): user: str | None = None skip_special_tokens: bool = True include_stop_str_in_output: bool = False + presence_penalty: float | None = Field( + default=None, + ge=-2.0, + le=2.0, + description=( + "The presence penalty that was used to penalize new tokens based on " + "whether they appear in the text so far." + ), + ) + frequency_penalty: float | None = Field( + default=None, + ge=-2.0, + le=2.0, + description=( + "The frequency penalty that was used to penalize new tokens based on " + "their frequency in the text so far." + ), + ) prompt_cache_key: str | None = Field( default=None, description=( @@ -328,6 +346,12 @@ class ResponsesRequest(OpenAIBaseModel): if (repetition_penalty := self.repetition_penalty) is None: repetition_penalty = default_sampling_params.get("repetition_penalty", 1.0) + if (presence_penalty := self.presence_penalty) is None: + presence_penalty = default_sampling_params.get("presence_penalty", 0.0) + + if (frequency_penalty := self.frequency_penalty) is None: + frequency_penalty = default_sampling_params.get("frequency_penalty", 0.0) + stop_token_ids = default_sampling_params.get("stop_token_ids") # Structured output @@ -367,6 +391,8 @@ class ResponsesRequest(OpenAIBaseModel): logprobs=self.top_logprobs if self.is_include_output_logprobs() else None, stop_token_ids=stop_token_ids, stop=stop, + frequency_penalty=frequency_penalty, + presence_penalty=presence_penalty, repetition_penalty=repetition_penalty, seed=self.seed, ignore_eos=self.ignore_eos, @@ -496,6 +522,25 @@ class ResponsesResponse(OpenAIBaseModel): usage: ResponseUsage | None = None user: str | None = None + presence_penalty: float | None = Field( + default=None, + ge=-2.0, + le=2.0, + description=( + "The presence penalty that was used to penalize new tokens based on " + "whether they appear in the text so far." + ), + ) + frequency_penalty: float | None = Field( + default=None, + ge=-2.0, + le=2.0, + description=( + "The frequency penalty that was used to penalize new tokens based on " + "their frequency in the text so far." + ), + ) + # vLLM-specific fields that are not in OpenAI spec kv_transfer_params: dict[str, Any] | None = Field( default=None, description="KVTransfer parameters." @@ -574,6 +619,8 @@ class ResponsesResponse(OpenAIBaseModel): prompt=request.prompt, reasoning=request.reasoning, service_tier=request.service_tier, + presence_penalty=sampling_params.presence_penalty, + frequency_penalty=sampling_params.frequency_penalty, status=status, text=request.text, top_logprobs=sampling_params.logprobs,