[Frontend] Create ErrorResponse instead of raising exceptions in run_batch (#8347)

This commit is contained in:
Pooya Davoodi
2024-09-10 22:30:11 -07:00
committed by GitHub
parent 6a512a00df
commit cea95dfb94
2 changed files with 31 additions and 4 deletions

View File

@@ -1,4 +1,5 @@
import asyncio
from http import HTTPStatus
from io import StringIO
from typing import Awaitable, Callable, List, Optional
@@ -135,6 +136,25 @@ async def write_file(path_or_url: str, data: str) -> None:
f.write(data)
def make_error_request_output(request: BatchRequestInput,
error_msg: str) -> BatchRequestOutput:
batch_output = BatchRequestOutput(
id=f"vllm-{random_uuid()}",
custom_id=request.custom_id,
response=BatchResponseData(
status_code=HTTPStatus.BAD_REQUEST,
request_id=f"vllm-batch-{random_uuid()}",
),
error=error_msg,
)
return batch_output
async def make_async_error_request_output(
request: BatchRequestInput, error_msg: str) -> BatchRequestOutput:
return make_error_request_output(request, error_msg)
async def run_request(serving_engine_func: Callable,
request: BatchRequestInput,
tracker: BatchProgressTracker) -> BatchRequestOutput:
@@ -158,7 +178,8 @@ async def run_request(serving_engine_func: Callable,
error=response,
)
else:
raise ValueError("Request must not be sent in stream mode")
batch_output = make_error_request_output(
request, error_msg="Request must not be sent in stream mode")
tracker.completed()
return batch_output
@@ -225,8 +246,12 @@ async def main(args):
tracker))
tracker.submitted()
else:
raise ValueError("Only /v1/chat/completions and /v1/embeddings are"
"supported in the batch endpoint.")
response_futures.append(
make_async_error_request_output(
request,
error_msg="Only /v1/chat/completions and "
"/v1/embeddings are supported in the batch endpoint.",
))
with tracker.pbar():
responses = await asyncio.gather(*response_futures)