[Feature]: add presence_penalty and frequency_penalty fields to Responses API (#38613)

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
This commit is contained in:
Chauncey
2026-03-31 16:45:57 +08:00
committed by GitHub
parent d9d21eb8e3
commit ce884756f0

View File

@@ -173,6 +173,24 @@ class ResponsesRequest(OpenAIBaseModel):
user: str | None = None
skip_special_tokens: bool = True
include_stop_str_in_output: bool = False
presence_penalty: float | None = Field(
default=None,
ge=-2.0,
le=2.0,
description=(
"The presence penalty that was used to penalize new tokens based on "
"whether they appear in the text so far."
),
)
frequency_penalty: float | None = Field(
default=None,
ge=-2.0,
le=2.0,
description=(
"The frequency penalty that was used to penalize new tokens based on "
"their frequency in the text so far."
),
)
prompt_cache_key: str | None = Field(
default=None,
description=(
@@ -328,6 +346,12 @@ class ResponsesRequest(OpenAIBaseModel):
if (repetition_penalty := self.repetition_penalty) is None:
repetition_penalty = default_sampling_params.get("repetition_penalty", 1.0)
if (presence_penalty := self.presence_penalty) is None:
presence_penalty = default_sampling_params.get("presence_penalty", 0.0)
if (frequency_penalty := self.frequency_penalty) is None:
frequency_penalty = default_sampling_params.get("frequency_penalty", 0.0)
stop_token_ids = default_sampling_params.get("stop_token_ids")
# Structured output
@@ -367,6 +391,8 @@ class ResponsesRequest(OpenAIBaseModel):
logprobs=self.top_logprobs if self.is_include_output_logprobs() else None,
stop_token_ids=stop_token_ids,
stop=stop,
frequency_penalty=frequency_penalty,
presence_penalty=presence_penalty,
repetition_penalty=repetition_penalty,
seed=self.seed,
ignore_eos=self.ignore_eos,
@@ -496,6 +522,25 @@ class ResponsesResponse(OpenAIBaseModel):
usage: ResponseUsage | None = None
user: str | None = None
presence_penalty: float | None = Field(
default=None,
ge=-2.0,
le=2.0,
description=(
"The presence penalty that was used to penalize new tokens based on "
"whether they appear in the text so far."
),
)
frequency_penalty: float | None = Field(
default=None,
ge=-2.0,
le=2.0,
description=(
"The frequency penalty that was used to penalize new tokens based on "
"their frequency in the text so far."
),
)
# vLLM-specific fields that are not in OpenAI spec
kv_transfer_params: dict[str, Any] | None = Field(
default=None, description="KVTransfer parameters."
@@ -574,6 +619,8 @@ class ResponsesResponse(OpenAIBaseModel):
prompt=request.prompt,
reasoning=request.reasoning,
service_tier=request.service_tier,
presence_penalty=sampling_params.presence_penalty,
frequency_penalty=sampling_params.frequency_penalty,
status=status,
text=request.text,
top_logprobs=sampling_params.logprobs,