[Feature]: add presence_penalty and frequency_penalty fields to Responses API (#38613)
Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
This commit is contained in:
@@ -173,6 +173,24 @@ class ResponsesRequest(OpenAIBaseModel):
|
||||
user: str | None = None
|
||||
skip_special_tokens: bool = True
|
||||
include_stop_str_in_output: bool = False
|
||||
presence_penalty: float | None = Field(
|
||||
default=None,
|
||||
ge=-2.0,
|
||||
le=2.0,
|
||||
description=(
|
||||
"The presence penalty that was used to penalize new tokens based on "
|
||||
"whether they appear in the text so far."
|
||||
),
|
||||
)
|
||||
frequency_penalty: float | None = Field(
|
||||
default=None,
|
||||
ge=-2.0,
|
||||
le=2.0,
|
||||
description=(
|
||||
"The frequency penalty that was used to penalize new tokens based on "
|
||||
"their frequency in the text so far."
|
||||
),
|
||||
)
|
||||
prompt_cache_key: str | None = Field(
|
||||
default=None,
|
||||
description=(
|
||||
@@ -328,6 +346,12 @@ class ResponsesRequest(OpenAIBaseModel):
|
||||
if (repetition_penalty := self.repetition_penalty) is None:
|
||||
repetition_penalty = default_sampling_params.get("repetition_penalty", 1.0)
|
||||
|
||||
if (presence_penalty := self.presence_penalty) is None:
|
||||
presence_penalty = default_sampling_params.get("presence_penalty", 0.0)
|
||||
|
||||
if (frequency_penalty := self.frequency_penalty) is None:
|
||||
frequency_penalty = default_sampling_params.get("frequency_penalty", 0.0)
|
||||
|
||||
stop_token_ids = default_sampling_params.get("stop_token_ids")
|
||||
|
||||
# Structured output
|
||||
@@ -367,6 +391,8 @@ class ResponsesRequest(OpenAIBaseModel):
|
||||
logprobs=self.top_logprobs if self.is_include_output_logprobs() else None,
|
||||
stop_token_ids=stop_token_ids,
|
||||
stop=stop,
|
||||
frequency_penalty=frequency_penalty,
|
||||
presence_penalty=presence_penalty,
|
||||
repetition_penalty=repetition_penalty,
|
||||
seed=self.seed,
|
||||
ignore_eos=self.ignore_eos,
|
||||
@@ -496,6 +522,25 @@ class ResponsesResponse(OpenAIBaseModel):
|
||||
usage: ResponseUsage | None = None
|
||||
user: str | None = None
|
||||
|
||||
presence_penalty: float | None = Field(
|
||||
default=None,
|
||||
ge=-2.0,
|
||||
le=2.0,
|
||||
description=(
|
||||
"The presence penalty that was used to penalize new tokens based on "
|
||||
"whether they appear in the text so far."
|
||||
),
|
||||
)
|
||||
frequency_penalty: float | None = Field(
|
||||
default=None,
|
||||
ge=-2.0,
|
||||
le=2.0,
|
||||
description=(
|
||||
"The frequency penalty that was used to penalize new tokens based on "
|
||||
"their frequency in the text so far."
|
||||
),
|
||||
)
|
||||
|
||||
# vLLM-specific fields that are not in OpenAI spec
|
||||
kv_transfer_params: dict[str, Any] | None = Field(
|
||||
default=None, description="KVTransfer parameters."
|
||||
@@ -574,6 +619,8 @@ class ResponsesResponse(OpenAIBaseModel):
|
||||
prompt=request.prompt,
|
||||
reasoning=request.reasoning,
|
||||
service_tier=request.service_tier,
|
||||
presence_penalty=sampling_params.presence_penalty,
|
||||
frequency_penalty=sampling_params.frequency_penalty,
|
||||
status=status,
|
||||
text=request.text,
|
||||
top_logprobs=sampling_params.logprobs,
|
||||
|
||||
Reference in New Issue
Block a user