[Frontend] Multimodal support in offline chat (#8098)

This commit is contained in:
Cyrus Leung
2024-09-04 13:22:17 +08:00
committed by GitHub
parent 2be8ec6e71
commit 855c262a6b
8 changed files with 356 additions and 112 deletions

View File

@@ -11,7 +11,7 @@ from vllm.engine.protocol import AsyncEngineClient
from vllm.entrypoints.chat_utils import (ConversationMessage,
apply_chat_template,
load_chat_template,
parse_chat_messages)
parse_chat_messages_futures)
from vllm.entrypoints.logger import RequestLogger
from vllm.entrypoints.openai.protocol import (
ChatCompletionLogProb, ChatCompletionLogProbs,
@@ -26,7 +26,6 @@ from vllm.entrypoints.openai.serving_engine import (LoRAModulePath,
TextTokensPrompt)
from vllm.inputs import TokensPrompt
from vllm.logger import init_logger
from vllm.multimodal import MultiModalDataDict
from vllm.outputs import RequestOutput
from vllm.sequence import Logprob
from vllm.tracing import (contains_trace_headers, extract_trace_headers,
@@ -94,7 +93,7 @@ class OpenAIServingChat(OpenAIServing):
tokenizer = await self.async_engine_client.get_tokenizer(
lora_request)
conversation, mm_data_future = parse_chat_messages(
conversation, mm_data_future = parse_chat_messages_futures(
request.messages, model_config, tokenizer)
tool_dicts = None if request.tools is None else [
@@ -114,10 +113,8 @@ class OpenAIServingChat(OpenAIServing):
logger.error("Error in applying chat template from request: %s", e)
return self.create_error_response(str(e))
mm_data: Optional[MultiModalDataDict] = None
try:
if mm_data_future:
mm_data = await mm_data_future
mm_data = await mm_data_future
except Exception as e:
logger.error("Error in loading multi-modal data: %s", e)
return self.create_error_response(str(e))