diff --git a/vllm/entrypoints/openai/chat_completion/api_router.py b/vllm/entrypoints/openai/chat_completion/api_router.py index d3576ab24..81af0af3d 100644 --- a/vllm/entrypoints/openai/chat_completion/api_router.py +++ b/vllm/entrypoints/openai/chat_completion/api_router.py @@ -57,7 +57,7 @@ async def create_chat_completion(request: ChatCompletionRequest, raw_request: Re try: generator = await handler.create_chat_completion(request, raw_request) except Exception as e: - return handler.create_error_response(e) + generator = handler.create_error_response(e) if isinstance(generator, ErrorResponse): return JSONResponse( @@ -96,7 +96,7 @@ async def render_chat_completion(request: ChatCompletionRequest, raw_request: Re try: result = await handler.render_chat_request(request) except Exception as e: - return handler.create_error_response(e) + result = handler.create_error_response(e) if isinstance(result, ErrorResponse): return JSONResponse(content=result.model_dump(), status_code=result.error.code) diff --git a/vllm/entrypoints/openai/completion/api_router.py b/vllm/entrypoints/openai/completion/api_router.py index f064a0a77..04dfdbccb 100644 --- a/vllm/entrypoints/openai/completion/api_router.py +++ b/vllm/entrypoints/openai/completion/api_router.py @@ -57,7 +57,7 @@ async def create_completion(request: CompletionRequest, raw_request: Request): try: generator = await handler.create_completion(request, raw_request) except Exception as e: - return handler.create_error_response(e) + generator = handler.create_error_response(e) if isinstance(generator, ErrorResponse): return JSONResponse( @@ -94,7 +94,7 @@ async def render_completion(request: CompletionRequest, raw_request: Request): try: result = await handler.render_completion_request(request) except Exception as e: - return handler.create_error_response(e) + result = handler.create_error_response(e) if isinstance(result, ErrorResponse): return JSONResponse(content=result.model_dump(), status_code=result.error.code) diff --git a/vllm/entrypoints/openai/responses/api_router.py b/vllm/entrypoints/openai/responses/api_router.py index 2be69999e..62328c045 100644 --- a/vllm/entrypoints/openai/responses/api_router.py +++ b/vllm/entrypoints/openai/responses/api_router.py @@ -66,7 +66,7 @@ async def create_responses(request: ResponsesRequest, raw_request: Request): try: generator = await handler.create_responses(request, raw_request) except Exception as e: - return handler.create_error_response(e) + generator = handler.create_error_response(e) if isinstance(generator, ErrorResponse): return JSONResponse( @@ -102,7 +102,7 @@ async def retrieve_responses( stream=stream, ) except Exception as e: - return handler.create_error_response(e) + response = handler.create_error_response(e) if isinstance(response, ErrorResponse): return JSONResponse( @@ -128,7 +128,7 @@ async def cancel_responses(response_id: str, raw_request: Request): try: response = await handler.cancel_responses(response_id) except Exception as e: - return handler.create_error_response(e) + response = handler.create_error_response(e) if isinstance(response, ErrorResponse): return JSONResponse( diff --git a/vllm/entrypoints/pooling/classify/api_router.py b/vllm/entrypoints/pooling/classify/api_router.py index f4afec7fe..8a1513ebc 100644 --- a/vllm/entrypoints/pooling/classify/api_router.py +++ b/vllm/entrypoints/pooling/classify/api_router.py @@ -35,7 +35,7 @@ async def create_classify(request: ClassificationRequest, raw_request: Request): try: generator = await handler.create_classify(request, raw_request) except Exception as e: - return handler.create_error_response(e) + generator = handler.create_error_response(e) if isinstance(generator, ErrorResponse): return JSONResponse( diff --git a/vllm/entrypoints/pooling/embed/api_router.py b/vllm/entrypoints/pooling/embed/api_router.py index c252bb43c..f77c07069 100644 --- a/vllm/entrypoints/pooling/embed/api_router.py +++ b/vllm/entrypoints/pooling/embed/api_router.py @@ -64,7 +64,7 @@ async def create_embedding( try: generator = await handler.create_embedding(request, raw_request) except Exception as e: - return handler.create_error_response(e) + generator = handler.create_error_response(e) if isinstance(generator, ErrorResponse): return JSONResponse( diff --git a/vllm/entrypoints/pooling/pooling/api_router.py b/vllm/entrypoints/pooling/pooling/api_router.py index bfff97daa..6084e724d 100644 --- a/vllm/entrypoints/pooling/pooling/api_router.py +++ b/vllm/entrypoints/pooling/pooling/api_router.py @@ -44,7 +44,7 @@ async def create_pooling(request: PoolingRequest, raw_request: Request): try: generator = await handler.create_pooling(request, raw_request) except Exception as e: - return handler.create_error_response(e) + generator = handler.create_error_response(e) if isinstance(generator, ErrorResponse): return JSONResponse( diff --git a/vllm/entrypoints/pooling/score/api_router.py b/vllm/entrypoints/pooling/score/api_router.py index 006403239..ef64ba45e 100644 --- a/vllm/entrypoints/pooling/score/api_router.py +++ b/vllm/entrypoints/pooling/score/api_router.py @@ -52,7 +52,7 @@ async def create_score(request: ScoreRequest, raw_request: Request): try: generator = await handler.create_score(request, raw_request) except Exception as e: - return handler.create_error_response(e) + generator = handler.create_error_response(e) if isinstance(generator, ErrorResponse): return JSONResponse( @@ -103,7 +103,7 @@ async def do_rerank(request: RerankRequest, raw_request: Request): try: generator = await handler.do_rerank(request, raw_request) except Exception as e: - return handler.create_error_response(e) + generator = handler.create_error_response(e) if isinstance(generator, ErrorResponse): return JSONResponse( diff --git a/vllm/entrypoints/serve/disagg/api_router.py b/vllm/entrypoints/serve/disagg/api_router.py index 08542ec5e..9966ba47b 100644 --- a/vllm/entrypoints/serve/disagg/api_router.py +++ b/vllm/entrypoints/serve/disagg/api_router.py @@ -67,7 +67,7 @@ async def generate(request: GenerateRequest, raw_request: Request): try: generator = await handler.serve_tokens(request, raw_request) except Exception as e: - return handler.create_error_response(e) + generator = handler.create_error_response(e) if isinstance(generator, ErrorResponse): return JSONResponse( diff --git a/vllm/entrypoints/serve/tokenize/api_router.py b/vllm/entrypoints/serve/tokenize/api_router.py index 66d34ef11..333acbca1 100644 --- a/vllm/entrypoints/serve/tokenize/api_router.py +++ b/vllm/entrypoints/serve/tokenize/api_router.py @@ -52,7 +52,7 @@ async def tokenize(request: TokenizeRequest, raw_request: Request): try: generator = await handler.create_tokenize(request, raw_request) except Exception as e: - return handler.create_error_response(e) + generator = handler.create_error_response(e) if isinstance(generator, ErrorResponse): return JSONResponse(