[Model][2/N] Improve all pooling task | Support multi-vector retrieval (#25370)

Signed-off-by: wang.yuqi <noooop@126.com>
This commit is contained in:
wang.yuqi
2025-10-15 19:14:41 +08:00
committed by GitHub
parent d4d1a6024f
commit f54f85129e
41 changed files with 786 additions and 399 deletions

View File

@@ -1748,16 +1748,19 @@ async def init_app_state(
else None
)
state.openai_serving_pooling = (
OpenAIServingPooling(
engine_client,
state.openai_serving_models,
request_logger=request_logger,
chat_template=resolved_chat_template,
chat_template_content_format=args.chat_template_content_format,
trust_request_chat_template=args.trust_request_chat_template,
log_error_stack=args.log_error_stack,
(
OpenAIServingPooling(
engine_client,
state.openai_serving_models,
supported_tasks=supported_tasks,
request_logger=request_logger,
chat_template=resolved_chat_template,
chat_template_content_format=args.chat_template_content_format,
trust_request_chat_template=args.trust_request_chat_template,
log_error_stack=args.log_error_stack,
)
)
if "encode" in supported_tasks
if ("token_embed" in supported_tasks or "token_classify" in supported_tasks)
else None
)
state.openai_serving_embedding = (