[Feature] OpenAI-Compatible Tools API + Streaming for Hermes & Mistral models (#5649)
Co-authored-by: constellate <constellate@1-ai-appserver-staging.codereach.com> Co-authored-by: Kyle Mistele <kyle@constellate.ai>
This commit is contained in:
@@ -1,23 +1,28 @@
|
||||
import asyncio
|
||||
import codecs
|
||||
import json
|
||||
from abc import ABC, abstractmethod
|
||||
from collections import defaultdict
|
||||
from functools import lru_cache
|
||||
from functools import lru_cache, partial
|
||||
from pathlib import Path
|
||||
from typing import (Any, Awaitable, Dict, Generic, Iterable, List, Literal,
|
||||
Mapping, Optional, Tuple, TypeVar, Union)
|
||||
Mapping, Optional, Tuple, TypeVar, Union, cast)
|
||||
|
||||
# yapf conflicts with isort for this block
|
||||
# yapf: disable
|
||||
from openai.types.chat import ChatCompletionContentPartImageParam
|
||||
from openai.types.chat import (ChatCompletionAssistantMessageParam,
|
||||
ChatCompletionContentPartImageParam)
|
||||
from openai.types.chat import (
|
||||
ChatCompletionContentPartParam as OpenAIChatCompletionContentPartParam)
|
||||
from openai.types.chat import ChatCompletionContentPartTextParam
|
||||
from openai.types.chat import (ChatCompletionContentPartRefusalParam,
|
||||
ChatCompletionContentPartTextParam)
|
||||
from openai.types.chat import (
|
||||
ChatCompletionMessageParam as OpenAIChatCompletionMessageParam)
|
||||
from openai.types.chat import (ChatCompletionMessageToolCallParam,
|
||||
ChatCompletionToolMessageParam)
|
||||
# yapf: enable
|
||||
# pydantic needs the TypedDict from typing_extensions
|
||||
from pydantic import ConfigDict, TypeAdapter
|
||||
from pydantic import ConfigDict
|
||||
from typing_extensions import Required, TypeAlias, TypedDict
|
||||
|
||||
from vllm.config import ModelConfig
|
||||
@@ -54,7 +59,8 @@ class CustomChatCompletionContentPartParam(TypedDict, total=False):
|
||||
|
||||
ChatCompletionContentPartParam: TypeAlias = Union[
|
||||
OpenAIChatCompletionContentPartParam, ChatCompletionContentPartAudioParam,
|
||||
CustomChatCompletionContentPartParam, ]
|
||||
ChatCompletionContentPartRefusalParam,
|
||||
CustomChatCompletionContentPartParam]
|
||||
|
||||
|
||||
class CustomChatCompletionMessageParam(TypedDict, total=False):
|
||||
@@ -72,15 +78,33 @@ class CustomChatCompletionMessageParam(TypedDict, total=False):
|
||||
same role.
|
||||
"""
|
||||
|
||||
tool_call_id: Optional[str]
|
||||
"""Tool call that this message is responding to."""
|
||||
|
||||
tool_calls: Optional[Iterable[ChatCompletionMessageToolCallParam]]
|
||||
"""The tool calls generated by the model, such as function calls."""
|
||||
|
||||
|
||||
ChatCompletionMessageParam = Union[OpenAIChatCompletionMessageParam,
|
||||
CustomChatCompletionMessageParam]
|
||||
|
||||
|
||||
# TODO: Make fields ReadOnly once mypy supports it
|
||||
class ConversationMessage(TypedDict):
|
||||
role: str
|
||||
content: str
|
||||
class ConversationMessage(TypedDict, total=False):
|
||||
role: Required[str]
|
||||
"""The role of the message's author."""
|
||||
|
||||
content: Optional[str]
|
||||
"""The contents of the message"""
|
||||
|
||||
tool_call_id: Optional[str]
|
||||
"""Tool call that this message is responding to."""
|
||||
|
||||
name: Optional[str]
|
||||
"""The name of the function to call"""
|
||||
|
||||
tool_calls: Optional[Iterable[ChatCompletionMessageToolCallParam]]
|
||||
"""The tool calls generated by the model, such as function calls."""
|
||||
|
||||
|
||||
ModalityStr = Literal["image", "audio"]
|
||||
@@ -319,9 +343,11 @@ def _get_full_multimodal_text_prompt(placeholder_counts: Dict[str, int],
|
||||
return "\n".join(missing_placeholders + [text_prompt])
|
||||
|
||||
|
||||
_TextParser = TypeAdapter(ChatCompletionContentPartTextParam)
|
||||
_ImageParser = TypeAdapter(ChatCompletionContentPartImageParam)
|
||||
_AudioParser = TypeAdapter(ChatCompletionContentPartAudioParam)
|
||||
# No need to validate using Pydantic again
|
||||
_TextParser = partial(cast, ChatCompletionContentPartTextParam)
|
||||
_ImageParser = partial(cast, ChatCompletionContentPartImageParam)
|
||||
_AudioParser = partial(cast, ChatCompletionContentPartAudioParam)
|
||||
_RefusalParser = partial(cast, ChatCompletionContentPartRefusalParam)
|
||||
|
||||
|
||||
def _parse_chat_message_content_parts(
|
||||
@@ -336,10 +362,10 @@ def _parse_chat_message_content_parts(
|
||||
for part in parts:
|
||||
part_type = part["type"]
|
||||
if part_type == "text":
|
||||
text = _TextParser.validate_python(part)["text"]
|
||||
text = _TextParser(part)["text"]
|
||||
texts.append(text)
|
||||
elif part_type == "image_url":
|
||||
image_url = _ImageParser.validate_python(part)["image_url"]
|
||||
image_url = _ImageParser(part)["image_url"]
|
||||
|
||||
if image_url.get("detail", "auto") != "auto":
|
||||
logger.warning(
|
||||
@@ -348,7 +374,7 @@ def _parse_chat_message_content_parts(
|
||||
|
||||
mm_parser.parse_image(image_url["url"])
|
||||
elif part_type == "audio_url":
|
||||
audio_url = _AudioParser.validate_python(part)["audio_url"]
|
||||
audio_url = _AudioParser(part)["audio_url"]
|
||||
|
||||
mm_parser.parse_audio(audio_url["url"])
|
||||
else:
|
||||
@@ -363,6 +389,11 @@ def _parse_chat_message_content_parts(
|
||||
return [ConversationMessage(role=role, content=text_prompt)]
|
||||
|
||||
|
||||
# No need to validate using Pydantic again
|
||||
_AssistantParser = partial(cast, ChatCompletionAssistantMessageParam)
|
||||
_ToolParser = partial(cast, ChatCompletionToolMessageParam)
|
||||
|
||||
|
||||
def _parse_chat_message_content(
|
||||
message: ChatCompletionMessageParam,
|
||||
mm_tracker: BaseMultiModalItemTracker,
|
||||
@@ -371,16 +402,34 @@ def _parse_chat_message_content(
|
||||
content = message.get("content")
|
||||
|
||||
if content is None:
|
||||
return []
|
||||
if isinstance(content, str):
|
||||
return [ConversationMessage(role=role, content=content)]
|
||||
content = []
|
||||
elif isinstance(content, str):
|
||||
content = [
|
||||
ChatCompletionContentPartTextParam(type="text", text=content)
|
||||
]
|
||||
|
||||
return _parse_chat_message_content_parts(
|
||||
result = _parse_chat_message_content_parts(
|
||||
role,
|
||||
content, # type: ignore
|
||||
mm_tracker,
|
||||
)
|
||||
|
||||
for result_msg in result:
|
||||
if role == 'assistant':
|
||||
parsed_msg = _AssistantParser(message)
|
||||
|
||||
if "tool_calls" in parsed_msg:
|
||||
result_msg["tool_calls"] = list(parsed_msg["tool_calls"])
|
||||
elif role == "tool":
|
||||
parsed_msg = _ToolParser(message)
|
||||
if "tool_call_id" in parsed_msg:
|
||||
result_msg["tool_call_id"] = parsed_msg["tool_call_id"]
|
||||
|
||||
if "name" in message and isinstance(message["name"], str):
|
||||
result_msg["name"] = message["name"]
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def parse_chat_messages(
|
||||
messages: List[ChatCompletionMessageParam],
|
||||
@@ -428,6 +477,20 @@ def apply_chat_template(
|
||||
"allowed, so you must provide a chat template if the tokenizer "
|
||||
"does not define one.")
|
||||
|
||||
# per the Transformers docs & maintainers, tool call arguments in
|
||||
# assistant-role messages with tool_calls need to be dicts not JSON str -
|
||||
# this is how tool-use chat templates will expect them moving forwards
|
||||
# so, for messages that have tool_calls, parse the string (which we get
|
||||
# from openAI format) to dict
|
||||
for message in conversation:
|
||||
if (message["role"] == "assistant" and "tool_calls" in message
|
||||
and isinstance(message["tool_calls"], list)):
|
||||
|
||||
for i in range(len(message["tool_calls"])):
|
||||
args: str = message["tool_calls"][i]["function"]["arguments"]
|
||||
parsed_args: Dict = json.loads(args)
|
||||
message["tool_calls"][i]["function"]["arguments"] = parsed_args
|
||||
|
||||
prompt = tokenizer.apply_chat_template(
|
||||
conversation=conversation,
|
||||
chat_template=chat_template,
|
||||
|
||||
Reference in New Issue
Block a user