[Frontend] Support scores endpoint in run_batch (#12430)

Signed-off-by: Pooya Davoodi <pooya.davoodi@parasail.io>
This commit is contained in:
Pooya Davoodi
2025-01-26 20:30:17 -08:00
committed by GitHub
parent 28e0750847
commit 0cc6b383d7
4 changed files with 99 additions and 7 deletions

View File

@@ -16,12 +16,14 @@ from vllm.entrypoints.openai.protocol import (BatchRequestInput,
BatchRequestOutput,
BatchResponseData,
ChatCompletionResponse,
EmbeddingResponse, ErrorResponse)
EmbeddingResponse, ErrorResponse,
ScoreResponse)
# yapf: enable
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
from vllm.entrypoints.openai.serving_embedding import OpenAIServingEmbedding
from vllm.entrypoints.openai.serving_models import (BaseModelPath,
OpenAIServingModels)
from vllm.entrypoints.openai.serving_score import OpenAIServingScores
from vllm.usage.usage_lib import UsageContext
from vllm.utils import FlexibleArgumentParser, random_uuid
from vllm.version import __version__ as VLLM_VERSION
@@ -167,7 +169,8 @@ async def run_request(serving_engine_func: Callable,
tracker: BatchProgressTracker) -> BatchRequestOutput:
response = await serving_engine_func(request.body)
if isinstance(response, (ChatCompletionResponse, EmbeddingResponse)):
if isinstance(response,
(ChatCompletionResponse, EmbeddingResponse, ScoreResponse)):
batch_output = BatchRequestOutput(
id=f"vllm-{random_uuid()}",
custom_id=request.custom_id,
@@ -239,6 +242,12 @@ async def main(args):
chat_template=None,
chat_template_content_format="auto",
) if model_config.task == "embed" else None
openai_serving_scores = (OpenAIServingScores(
engine,
model_config,
openai_serving_models,
request_logger=request_logger,
) if model_config.task == "score" else None)
tracker = BatchProgressTracker()
logger.info("Reading batch from %s...", args.input_file)
@@ -279,14 +288,28 @@ async def main(args):
))
continue
response_futures.append(run_request(handler_fn, request, tracker))
tracker.submitted()
elif request.url == "/v1/score":
handler_fn = (None if openai_serving_scores is None else
openai_serving_scores.create_score)
if handler_fn is None:
response_futures.append(
make_async_error_request_output(
request,
error_msg="The model does not support Scores API",
))
continue
response_futures.append(run_request(handler_fn, request, tracker))
tracker.submitted()
else:
response_futures.append(
make_async_error_request_output(
request,
error_msg="Only /v1/chat/completions and "
"/v1/embeddings are supported in the batch endpoint.",
error_msg=
"Only /v1/chat/completions, /v1/embeddings, and /v1/score "
"are supported in the batch endpoint.",
))
with tracker.pbar():