Adds anthropic /v1/messages endpoint to openai api_server (#27882)

Signed-off-by: bbartels <benjamin@bartels.dev>
Signed-off-by: Benjamin Bartels <benjamin@bartels.dev>
This commit is contained in:
Benjamin Bartels
2025-11-01 19:45:42 +00:00
committed by GitHub
parent c2ed069b32
commit 1e88fb751b
5 changed files with 139 additions and 462 deletions

View File

@@ -41,6 +41,13 @@ import vllm.envs as envs
from vllm.config import VllmConfig
from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.engine.protocol import Device, EngineClient
from vllm.entrypoints.anthropic.protocol import (
AnthropicError,
AnthropicErrorResponse,
AnthropicMessagesRequest,
AnthropicMessagesResponse,
)
from vllm.entrypoints.anthropic.serving_messages import AnthropicServingMessages
from vllm.entrypoints.launcher import serve_http
from vllm.entrypoints.logger import RequestLogger
from vllm.entrypoints.openai.cli_args import make_arg_parser, validate_parsed_serve_args
@@ -308,6 +315,10 @@ def responses(request: Request) -> OpenAIServingResponses | None:
return request.app.state.openai_serving_responses
def messages(request: Request) -> AnthropicServingMessages:
return request.app.state.anthropic_serving_messages
def chat(request: Request) -> OpenAIServingChat | None:
return request.app.state.openai_serving_chat
@@ -591,6 +602,63 @@ async def cancel_responses(response_id: str, raw_request: Request):
return JSONResponse(content=response.model_dump())
@router.post(
"/v1/messages",
dependencies=[Depends(validate_json_request)],
responses={
HTTPStatus.OK.value: {"content": {"text/event-stream": {}}},
HTTPStatus.BAD_REQUEST.value: {"model": AnthropicErrorResponse},
HTTPStatus.NOT_FOUND.value: {"model": AnthropicErrorResponse},
HTTPStatus.INTERNAL_SERVER_ERROR.value: {"model": AnthropicErrorResponse},
},
)
@with_cancellation
@load_aware_call
async def create_messages(request: AnthropicMessagesRequest, raw_request: Request):
def translate_error_response(response: ErrorResponse) -> JSONResponse:
anthropic_error = AnthropicErrorResponse(
error=AnthropicError(
type=response.error.type,
message=response.error.message,
)
)
return JSONResponse(
status_code=response.error.code, content=anthropic_error.model_dump()
)
handler = messages(raw_request)
if handler is None:
error = base(raw_request).create_error_response(
message="The model does not support Messages API"
)
return translate_error_response(error)
try:
generator = await handler.create_messages(request, raw_request)
except Exception as e:
logger.exception("Error in create_messages: %s", e)
return JSONResponse(
status_code=HTTPStatus.INTERNAL_SERVER_ERROR.value,
content=AnthropicErrorResponse(
error=AnthropicError(
type="internal_error",
message=str(e),
)
).model_dump(),
)
if isinstance(generator, ErrorResponse):
return translate_error_response(generator)
elif isinstance(generator, AnthropicMessagesResponse):
logger.debug(
"Anthropic Messages Response: %s", generator.model_dump(exclude_none=True)
)
return JSONResponse(content=generator.model_dump(exclude_none=True))
return StreamingResponse(content=generator, media_type="text/event-stream")
@router.post(
"/v1/chat/completions",
dependencies=[Depends(validate_json_request)],
@@ -1817,6 +1885,24 @@ async def init_app_state(
if "transcription" in supported_tasks
else None
)
state.anthropic_serving_messages = (
AnthropicServingMessages(
engine_client,
state.openai_serving_models,
args.response_role,
request_logger=request_logger,
chat_template=resolved_chat_template,
chat_template_content_format=args.chat_template_content_format,
return_tokens_as_token_ids=args.return_tokens_as_token_ids,
enable_auto_tools=args.enable_auto_tool_choice,
tool_parser=args.tool_call_parser,
reasoning_parser=args.structured_outputs_config.reasoning_parser,
enable_prompt_tokens_details=args.enable_prompt_tokens_details,
enable_force_include_usage=args.enable_force_include_usage,
)
if "generate" in supported_tasks
else None
)
state.enable_server_load_tracking = args.enable_server_load_tracking
state.server_load_metrics = 0