diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml index e78cdd7f8..19fc79f61 100644 --- a/.buildkite/test-amd.yaml +++ b/.buildkite/test-amd.yaml @@ -132,7 +132,7 @@ steps: - tests/entrypoints/ commands: - pytest -v -s entrypoints/openai/tool_parsers - - pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/openai --ignore=entrypoints/rpc --ignore=entrypoints/sleep --ignore=entrypoints/instrumentator --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py --ignore=entrypoints/pooling + - pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/openai --ignore=entrypoints/rpc --ignore=entrypoints/instrumentator --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py --ignore=entrypoints/pooling - label: Entrypoints Integration Test (LLM) # 30min timeout_in_minutes: 40 @@ -179,14 +179,14 @@ steps: torch_nightly: true source_file_dependencies: - vllm/ - - tests/entrypoints/sleep - tests/entrypoints/rpc + - tests/entrypoints/instrumentator - tests/tool_use commands: - export VLLM_WORKER_MULTIPROC_METHOD=spawn - - pytest -v -s entrypoints/sleep + - pytest -v -s entrypoints/instrumentator + - PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc - pytest -v -s tool_use - - PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc - label: Entrypoints Integration Test (Pooling) timeout_in_minutes: 50 diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 73d4cf80c..74e0d19e0 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -118,7 +118,7 @@ steps: - tests/entrypoints/ commands: - pytest -v -s entrypoints/openai/tool_parsers - - pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/rpc --ignore=entrypoints/sleep --ignore=entrypoints/instrumentator --ignore=entrypoints/openai --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py --ignore=entrypoints/pooling + - pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/rpc --ignore=entrypoints/instrumentator --ignore=entrypoints/openai --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py --ignore=entrypoints/pooling - label: Entrypoints Integration Test (LLM) # 30min timeout_in_minutes: 40 @@ -148,7 +148,7 @@ steps: - tests/entrypoints/test_chat_utils commands: - export VLLM_WORKER_MULTIPROC_METHOD=spawn - - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses + - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/instrumentator --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses - pytest -v -s entrypoints/test_chat_utils.py - label: Entrypoints Integration Test (API Server 2) @@ -159,13 +159,13 @@ steps: torch_nightly: true source_file_dependencies: - vllm/ - - tests/entrypoints/sleep - tests/entrypoints/rpc + - tests/entrypoints/instrumentator - tests/tool_use commands: - export VLLM_WORKER_MULTIPROC_METHOD=spawn - - pytest -v -s entrypoints/sleep - - PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc + - pytest -v -s entrypoints/instrumentator + - PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc - pytest -v -s tool_use - label: Entrypoints Integration Test (Pooling) diff --git a/.buildkite/test_areas/entrypoints.yaml b/.buildkite/test_areas/entrypoints.yaml index 8e02d9f60..0c72e3d9b 100644 --- a/.buildkite/test_areas/entrypoints.yaml +++ b/.buildkite/test_areas/entrypoints.yaml @@ -42,15 +42,13 @@ steps: working_dir: "/vllm-workspace/tests" source_file_dependencies: - vllm/ - - tests/tool_use - - tests/entrypoints/sleep - - tests/entrypoints/instrumentator - tests/entrypoints/rpc + - tests/entrypoints/instrumentator + - tests/tool_use commands: - export VLLM_WORKER_MULTIPROC_METHOD=spawn - - PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc - pytest -v -s entrypoints/instrumentator - - pytest -v -s entrypoints/sleep + - PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc - pytest -v -s tool_use - label: Entrypoints Integration (Pooling) diff --git a/tests/entrypoints/openai/test_basic.py b/tests/entrypoints/instrumentator/test_basic.py similarity index 100% rename from tests/entrypoints/openai/test_basic.py rename to tests/entrypoints/instrumentator/test_basic.py diff --git a/tests/entrypoints/openai/test_optional_middleware.py b/tests/entrypoints/instrumentator/test_optional_middleware.py similarity index 100% rename from tests/entrypoints/openai/test_optional_middleware.py rename to tests/entrypoints/instrumentator/test_optional_middleware.py diff --git a/tests/entrypoints/openai/test_orca_metrics.py b/tests/entrypoints/instrumentator/test_orca_metrics.py similarity index 100% rename from tests/entrypoints/openai/test_orca_metrics.py rename to tests/entrypoints/instrumentator/test_orca_metrics.py diff --git a/tests/entrypoints/sleep/test_sleep.py b/tests/entrypoints/instrumentator/test_sleep.py similarity index 100% rename from tests/entrypoints/sleep/test_sleep.py rename to tests/entrypoints/instrumentator/test_sleep.py diff --git a/tests/entrypoints/sleep/__init__.py b/tests/entrypoints/sleep/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index 1ce706abc..d76a7446d 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -178,10 +178,6 @@ def build_app( app = FastAPI(lifespan=lifespan) app.state.args = args - from vllm.entrypoints.openai.basic.api_router import register_basic_api_routers - - register_basic_api_routers(app) - from vllm.entrypoints.serve import register_vllm_serve_api_routers register_vllm_serve_api_routers(app) @@ -205,6 +201,24 @@ def build_app( register_generate_api_routers(app) + from vllm.entrypoints.serve.disagg.api_router import ( + attach_router as attach_disagg_router, + ) + + attach_disagg_router(app) + + from vllm.entrypoints.serve.rlhf.api_router import ( + attach_router as attach_rlhf_router, + ) + + attach_rlhf_router(app) + + from vllm.entrypoints.serve.elastic_ep.api_router import ( + attach_router as elastic_ep_attach_router, + ) + + elastic_ep_attach_router(app) + if "transcription" in supported_tasks: from vllm.entrypoints.openai.speech_to_text.api_router import ( attach_router as register_speech_to_text_api_router, diff --git a/vllm/entrypoints/openai/basic/__init__.py b/vllm/entrypoints/openai/basic/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/vllm/entrypoints/sagemaker/api_router.py b/vllm/entrypoints/sagemaker/api_router.py index 7c5bae5b5..1138225c3 100644 --- a/vllm/entrypoints/sagemaker/api_router.py +++ b/vllm/entrypoints/sagemaker/api_router.py @@ -10,10 +10,10 @@ import pydantic from fastapi import APIRouter, Depends, FastAPI, HTTPException, Request from fastapi.responses import JSONResponse, Response -from vllm.entrypoints.openai.basic.api_router import base from vllm.entrypoints.openai.engine.protocol import ErrorResponse from vllm.entrypoints.openai.engine.serving import OpenAIServing from vllm.entrypoints.openai.utils import validate_json_request +from vllm.entrypoints.serve.instrumentator.basic import base from vllm.entrypoints.serve.instrumentator.health import health from vllm.tasks import POOLING_TASKS, SupportedTask diff --git a/vllm/entrypoints/serve/__init__.py b/vllm/entrypoints/serve/__init__.py index f5c80f682..8233d3324 100644 --- a/vllm/entrypoints/serve/__init__.py +++ b/vllm/entrypoints/serve/__init__.py @@ -22,12 +22,6 @@ def register_vllm_serve_api_routers(app: FastAPI): attach_lora_router(app) - from vllm.entrypoints.serve.elastic_ep.api_router import ( - attach_router as attach_elastic_ep_router, - ) - - attach_elastic_ep_router(app) - from vllm.entrypoints.serve.profile.api_router import ( attach_router as attach_profile_router, ) @@ -58,37 +52,6 @@ def register_vllm_serve_api_routers(app: FastAPI): attach_tokenize_router(app) - from vllm.entrypoints.serve.disagg.api_router import ( - attach_router as attach_disagg_router, - ) + from .instrumentator import register_instrumentator_api_routers - attach_disagg_router(app) - - from vllm.entrypoints.serve.rlhf.api_router import ( - attach_router as attach_rlhf_router, - ) - - attach_rlhf_router(app) - - from vllm.entrypoints.serve.instrumentator.metrics import ( - attach_router as attach_metrics_router, - ) - - attach_metrics_router(app) - - from vllm.entrypoints.serve.instrumentator.health import ( - attach_router as attach_health_router, - ) - - attach_health_router(app) - - from vllm.entrypoints.serve.instrumentator.offline_docs import ( - attach_router as attach_offline_docs_router, - ) - - attach_offline_docs_router(app) - from vllm.entrypoints.serve.instrumentator.server_info import ( - attach_router as attach_server_info_router, - ) - - attach_server_info_router(app) + register_instrumentator_api_routers(app) diff --git a/vllm/entrypoints/serve/instrumentator/__init__.py b/vllm/entrypoints/serve/instrumentator/__init__.py index e69de29bb..8abce0232 100644 --- a/vllm/entrypoints/serve/instrumentator/__init__.py +++ b/vllm/entrypoints/serve/instrumentator/__init__.py @@ -0,0 +1,29 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +from fastapi import FastAPI + +from vllm import envs + + +def register_instrumentator_api_routers(app: FastAPI): + from .basic import router as basic_router + + app.include_router(basic_router) + + from .health import router as health_router + + app.include_router(health_router) + + from .metrics import attach_router as metrics_attach_router + + metrics_attach_router(app) + + from .offline_docs import attach_router as offline_docs_attach_router + + offline_docs_attach_router(app) + + if envs.VLLM_SERVER_DEV_MODE: + from .server_info import router as server_info_router + + app.include_router(server_info_router) diff --git a/vllm/entrypoints/openai/basic/api_router.py b/vllm/entrypoints/serve/instrumentator/basic.py similarity index 92% rename from vllm/entrypoints/openai/basic/api_router.py rename to vllm/entrypoints/serve/instrumentator/basic.py index 3378d914a..e6c96de0b 100644 --- a/vllm/entrypoints/openai/basic/api_router.py +++ b/vllm/entrypoints/serve/instrumentator/basic.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from fastapi import APIRouter, FastAPI, Request +from fastapi import APIRouter, Request from fastapi.responses import JSONResponse from vllm.engine.protocol import EngineClient @@ -55,7 +55,3 @@ async def get_server_load_metrics(request: Request): async def show_version(): ver = {"version": VLLM_VERSION} return JSONResponse(content=ver) - - -def register_basic_api_routers(app: FastAPI): - app.include_router(router) diff --git a/vllm/entrypoints/serve/instrumentator/health.py b/vllm/entrypoints/serve/instrumentator/health.py index 029ef677a..8b079ce31 100644 --- a/vllm/entrypoints/serve/instrumentator/health.py +++ b/vllm/entrypoints/serve/instrumentator/health.py @@ -27,7 +27,3 @@ async def health(raw_request: Request) -> Response: return Response(status_code=200) except EngineDeadError: return Response(status_code=503) - - -def attach_router(app): - app.include_router(router) diff --git a/vllm/entrypoints/serve/instrumentator/server_info.py b/vllm/entrypoints/serve/instrumentator/server_info.py index d6ef994f3..60967c5a6 100644 --- a/vllm/entrypoints/serve/instrumentator/server_info.py +++ b/vllm/entrypoints/serve/instrumentator/server_info.py @@ -7,7 +7,7 @@ import functools from typing import Annotated, Literal import pydantic -from fastapi import APIRouter, FastAPI, Query, Request +from fastapi import APIRouter, Query, Request from fastapi.responses import JSONResponse import vllm.envs as envs @@ -57,9 +57,3 @@ async def show_server_info( "system_env": await asyncio.to_thread(_get_system_env_info_cached), } return JSONResponse(content=server_info) - - -def attach_router(app: FastAPI): - if not envs.VLLM_SERVER_DEV_MODE: - return - app.include_router(router)