[Frontend] generation_config.json for maximum tokens(#12242)
Signed-off-by: Matthew Hendrey <matthew.hendrey@gmail.com> Signed-off-by: Shangming Cai <caishangming@linux.alibaba.com> Signed-off-by: youkaichao <youkaichao@gmail.com> Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Signed-off-by: Yuan Tang <terrytangyuan@gmail.com> Signed-off-by: Isotr0py <2037008807@qq.com> Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> Signed-off-by: Chen Zhang <zhangch99@outlook.com> Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com> Co-authored-by: shangmingc <caishangming@linux.alibaba.com> Co-authored-by: youkaichao <youkaichao@gmail.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Yuan Tang <terrytangyuan@gmail.com> Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn> Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk> Co-authored-by: Chen Zhang <zhangch99@outlook.com> Co-authored-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
@@ -380,13 +380,17 @@ class ChatCompletionRequest(OpenAIBaseModel):
|
||||
) -> BeamSearchParams:
|
||||
# TODO(#9845): remove max_tokens when field is removed from OpenAI API
|
||||
max_tokens = self.max_completion_tokens or self.max_tokens
|
||||
if max_tokens is None:
|
||||
max_tokens = default_max_tokens
|
||||
|
||||
if default_sampling_params is None:
|
||||
default_sampling_params = {}
|
||||
n = self.n if self.n is not None else 1
|
||||
|
||||
# Use minimum of context window, user request & server limit.
|
||||
max_tokens = min(
|
||||
val for val in (default_max_tokens, max_tokens,
|
||||
default_sampling_params.get("max_tokens", None))
|
||||
if val is not None)
|
||||
|
||||
if (temperature := self.temperature) is None:
|
||||
temperature = default_sampling_params.get(
|
||||
"temperature", self._DEFAULT_SAMPLING_PARAMS["temperature"])
|
||||
@@ -406,11 +410,16 @@ class ChatCompletionRequest(OpenAIBaseModel):
|
||||
default_sampling_params: Optional[dict] = None) -> SamplingParams:
|
||||
# TODO(#9845): remove max_tokens when field is removed from OpenAI API
|
||||
max_tokens = self.max_completion_tokens or self.max_tokens
|
||||
if max_tokens is None:
|
||||
max_tokens = default_max_tokens
|
||||
|
||||
if default_sampling_params is None:
|
||||
default_sampling_params = {}
|
||||
|
||||
# Use minimum of context window, user request & server limit.
|
||||
max_tokens = min(
|
||||
val for val in (default_max_tokens, max_tokens,
|
||||
default_sampling_params.get("max_tokens", None))
|
||||
if val is not None)
|
||||
|
||||
# Default parameters
|
||||
if (repetition_penalty := self.repetition_penalty) is None:
|
||||
repetition_penalty = default_sampling_params.get(
|
||||
@@ -740,13 +749,17 @@ class CompletionRequest(OpenAIBaseModel):
|
||||
default_sampling_params: Optional[dict] = None
|
||||
) -> BeamSearchParams:
|
||||
max_tokens = self.max_tokens
|
||||
if max_tokens is None:
|
||||
max_tokens = default_max_tokens
|
||||
|
||||
if default_sampling_params is None:
|
||||
default_sampling_params = {}
|
||||
n = self.n if self.n is not None else 1
|
||||
|
||||
# Use minimum of context window, user request & server limit.
|
||||
max_tokens = min(
|
||||
val for val in (default_max_tokens, max_tokens,
|
||||
default_sampling_params.get("max_tokens", None))
|
||||
if val is not None)
|
||||
|
||||
if (temperature := self.temperature) is None:
|
||||
temperature = default_sampling_params.get("temperature", 1.0)
|
||||
|
||||
@@ -764,11 +777,16 @@ class CompletionRequest(OpenAIBaseModel):
|
||||
logits_processor_pattern: Optional[str],
|
||||
default_sampling_params: Optional[dict] = None) -> SamplingParams:
|
||||
max_tokens = self.max_tokens
|
||||
if max_tokens is None:
|
||||
max_tokens = default_max_tokens
|
||||
|
||||
if default_sampling_params is None:
|
||||
default_sampling_params = {}
|
||||
|
||||
# Use minimum of context window, user request & server limit.
|
||||
max_tokens = min(
|
||||
val for val in (default_max_tokens, max_tokens,
|
||||
default_sampling_params.get("max_tokens", None))
|
||||
if val is not None)
|
||||
|
||||
# Default parameters
|
||||
if (repetition_penalty := self.repetition_penalty) is None:
|
||||
repetition_penalty = default_sampling_params.get(
|
||||
|
||||
Reference in New Issue
Block a user