[Frontend] Added support for HF's new continue_final_message parameter (#8942)
This commit is contained in:
@@ -211,6 +211,15 @@ class ChatCompletionRequest(OpenAIBaseModel):
|
||||
"This is a parameter used by chat template in tokenizer config of the "
|
||||
"model."),
|
||||
)
|
||||
continue_final_message: bool = Field(
|
||||
default=False,
|
||||
description=
|
||||
("If this is set, the chat will be formatted so that the final "
|
||||
"message in the chat is open-ended, without any EOS tokens. The "
|
||||
"model will continue this message rather than starting a new one. "
|
||||
"This allows you to \"prefill\" part of the model's response for it. "
|
||||
"Cannot be used at the same time as `add_generation_prompt`."),
|
||||
)
|
||||
add_special_tokens: bool = Field(
|
||||
default=False,
|
||||
description=(
|
||||
@@ -431,6 +440,15 @@ class ChatCompletionRequest(OpenAIBaseModel):
|
||||
" of the specified `tools`")
|
||||
return data
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def check_generation_prompt(cls, data):
|
||||
if data.get("continue_final_message") and data.get(
|
||||
"add_generation_prompt"):
|
||||
raise ValueError("Cannot set both `continue_final_message` and "
|
||||
"`add_generation_prompt` to True.")
|
||||
return data
|
||||
|
||||
|
||||
class CompletionRequest(OpenAIBaseModel):
|
||||
# Ordered by official OpenAI API documentation
|
||||
@@ -862,8 +880,18 @@ class TokenizeChatRequest(OpenAIBaseModel):
|
||||
messages: List[ChatCompletionMessageParam]
|
||||
|
||||
add_generation_prompt: bool = Field(default=True)
|
||||
continue_final_message: bool = Field(default=False)
|
||||
add_special_tokens: bool = Field(default=False)
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def check_generation_prompt(cls, data):
|
||||
if data.get("continue_final_message") and data.get(
|
||||
"add_generation_prompt"):
|
||||
raise ValueError("Cannot set both `continue_final_message` and "
|
||||
"`add_generation_prompt` to True.")
|
||||
return data
|
||||
|
||||
|
||||
TokenizeRequest = Union[TokenizeCompletionRequest, TokenizeChatRequest]
|
||||
|
||||
|
||||
@@ -140,6 +140,7 @@ class OpenAIServingChat(OpenAIServing):
|
||||
messages=request.messages,
|
||||
chat_template=request.chat_template or self.chat_template,
|
||||
add_generation_prompt=request.add_generation_prompt,
|
||||
continue_final_message=request.continue_final_message,
|
||||
tools=tool_dicts,
|
||||
documents=request.documents,
|
||||
**(request.chat_template_kwargs or {}),
|
||||
@@ -150,6 +151,7 @@ class OpenAIServingChat(OpenAIServing):
|
||||
conversation=conversation,
|
||||
chat_template=request.chat_template or self.chat_template,
|
||||
add_generation_prompt=request.add_generation_prompt,
|
||||
continue_final_message=request.continue_final_message,
|
||||
tools=tool_dicts,
|
||||
documents=request.documents,
|
||||
**(request.chat_template_kwargs or {}),
|
||||
@@ -361,7 +363,7 @@ class OpenAIServingChat(OpenAIServing):
|
||||
|
||||
# Send response to echo the input portion of the
|
||||
# last message
|
||||
if request.echo:
|
||||
if request.echo or request.continue_final_message:
|
||||
last_msg_content: str = ""
|
||||
if conversation and "content" in conversation[
|
||||
-1] and conversation[-1].get("role") == role:
|
||||
@@ -716,7 +718,7 @@ class OpenAIServingChat(OpenAIServing):
|
||||
stop_reason=output.stop_reason)
|
||||
choices.append(choice_data)
|
||||
|
||||
if request.echo:
|
||||
if request.echo or request.continue_final_message:
|
||||
last_msg_content = ""
|
||||
if conversation and "content" in conversation[-1] and conversation[
|
||||
-1].get("role") == role:
|
||||
|
||||
@@ -87,6 +87,7 @@ class OpenAIServingTokenization(OpenAIServing):
|
||||
messages=request.messages,
|
||||
chat_template=self.chat_template,
|
||||
add_generation_prompt=request.add_generation_prompt,
|
||||
continue_final_message=request.continue_final_message,
|
||||
)
|
||||
else:
|
||||
prompt = apply_hf_chat_template(
|
||||
@@ -94,6 +95,7 @@ class OpenAIServingTokenization(OpenAIServing):
|
||||
conversation=conversation,
|
||||
chat_template=self.chat_template,
|
||||
add_generation_prompt=request.add_generation_prompt,
|
||||
continue_final_message=request.continue_final_message,
|
||||
)
|
||||
else:
|
||||
prompt = request.prompt
|
||||
|
||||
Reference in New Issue
Block a user