[Frontend] Added support for HF's new continue_final_message parameter (#8942)

2024-09-29 20:59:47 +03:00
parent 1fb9c1b0bf
commit 6c9ba48fde
7 changed files with 102 additions and 28 deletions
--- a/vllm/entrypoints/llm.py
+++ b/vllm/entrypoints/llm.py
@@ -501,6 +501,7 @@ class LLM:
        lora_request: Optional[LoRARequest] = None,
        chat_template: Optional[str] = None,
        add_generation_prompt: bool = True,
+        continue_final_message: bool = False,
        tools: Optional[List[Dict[str, Any]]] = None,
    ) -> List[RequestOutput]:
        """
@@ -528,6 +529,9 @@ class LLM:
              If not provided, the model's default chat template will be used.
            add_generation_prompt: If True, adds a generation template
                to each message.
+            continue_final_message: If True, continues the final message in
+                the conversation instead of starting a new one. Cannot be `True`
+                if `add_generation_prompt` is also `True`.

        Returns:
            A list of ``RequestOutput`` objects containing the generated
@@ -559,6 +563,7 @@ class LLM:
                    messages=msgs,
                    chat_template=chat_template,
                    add_generation_prompt=add_generation_prompt,
+                    continue_final_message=continue_final_message,
                    tools=tools,
                )
            else:
@@ -567,6 +572,7 @@ class LLM:
                    conversation=conversation,
                    chat_template=chat_template,
                    add_generation_prompt=add_generation_prompt,
+                    continue_final_message=continue_final_message,
                    tools=tools,
                )