[Bugfix][Refactor] Unify model management in frontend (#11660)

Signed-off-by: Joe Runde <Joseph.Runde@ibm.com>
This commit is contained in:
Joe Runde
2024-12-31 18:21:51 -08:00
committed by GitHub
parent 0c6f998554
commit 4db72e57f6
15 changed files with 365 additions and 307 deletions

View File

@@ -20,7 +20,8 @@ from vllm.entrypoints.openai.protocol import (BatchRequestInput,
# yapf: enable
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
from vllm.entrypoints.openai.serving_embedding import OpenAIServingEmbedding
from vllm.entrypoints.openai.serving_engine import BaseModelPath
from vllm.entrypoints.openai.serving_models import (BaseModelPath,
OpenAIServingModels)
from vllm.usage.usage_lib import UsageContext
from vllm.utils import FlexibleArgumentParser, random_uuid
from vllm.version import __version__ as VLLM_VERSION
@@ -213,13 +214,17 @@ async def main(args):
request_logger = RequestLogger(max_log_len=args.max_log_len)
# Create the openai serving objects.
openai_serving_models = OpenAIServingModels(
model_config=model_config,
base_model_paths=base_model_paths,
lora_modules=None,
prompt_adapters=None,
)
openai_serving_chat = OpenAIServingChat(
engine,
model_config,
base_model_paths,
openai_serving_models,
args.response_role,
lora_modules=None,
prompt_adapters=None,
request_logger=request_logger,
chat_template=None,
chat_template_content_format="auto",
@@ -228,7 +233,7 @@ async def main(args):
openai_serving_embedding = OpenAIServingEmbedding(
engine,
model_config,
base_model_paths,
openai_serving_models,
request_logger=request_logger,
chat_template=None,
chat_template_content_format="auto",