[Frontend] Multimodal support in offline chat (#8098)
This commit is contained in:
@@ -11,7 +11,7 @@ from vllm.engine.protocol import AsyncEngineClient
|
||||
from vllm.entrypoints.chat_utils import (ConversationMessage,
|
||||
apply_chat_template,
|
||||
load_chat_template,
|
||||
parse_chat_messages)
|
||||
parse_chat_messages_futures)
|
||||
from vllm.entrypoints.logger import RequestLogger
|
||||
from vllm.entrypoints.openai.protocol import (
|
||||
ChatCompletionLogProb, ChatCompletionLogProbs,
|
||||
@@ -26,7 +26,6 @@ from vllm.entrypoints.openai.serving_engine import (LoRAModulePath,
|
||||
TextTokensPrompt)
|
||||
from vllm.inputs import TokensPrompt
|
||||
from vllm.logger import init_logger
|
||||
from vllm.multimodal import MultiModalDataDict
|
||||
from vllm.outputs import RequestOutput
|
||||
from vllm.sequence import Logprob
|
||||
from vllm.tracing import (contains_trace_headers, extract_trace_headers,
|
||||
@@ -94,7 +93,7 @@ class OpenAIServingChat(OpenAIServing):
|
||||
tokenizer = await self.async_engine_client.get_tokenizer(
|
||||
lora_request)
|
||||
|
||||
conversation, mm_data_future = parse_chat_messages(
|
||||
conversation, mm_data_future = parse_chat_messages_futures(
|
||||
request.messages, model_config, tokenizer)
|
||||
|
||||
tool_dicts = None if request.tools is None else [
|
||||
@@ -114,10 +113,8 @@ class OpenAIServingChat(OpenAIServing):
|
||||
logger.error("Error in applying chat template from request: %s", e)
|
||||
return self.create_error_response(str(e))
|
||||
|
||||
mm_data: Optional[MultiModalDataDict] = None
|
||||
try:
|
||||
if mm_data_future:
|
||||
mm_data = await mm_data_future
|
||||
mm_data = await mm_data_future
|
||||
except Exception as e:
|
||||
logger.error("Error in loading multi-modal data: %s", e)
|
||||
return self.create_error_response(str(e))
|
||||
|
||||
Reference in New Issue
Block a user