[Frontend][CI] Consolidate instrumentator entrypoints (#34123)

Signed-off-by: wang.yuqi <yuqi.wang@daocloud.io>
This commit is contained in:
wang.yuqi
2026-02-10 15:30:19 +08:00
committed by GitHub
parent 8d48d0a9d9
commit dab1de9f38
16 changed files with 64 additions and 74 deletions

View File

@@ -132,7 +132,7 @@ steps:
- tests/entrypoints/
commands:
- pytest -v -s entrypoints/openai/tool_parsers
- pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/openai --ignore=entrypoints/rpc --ignore=entrypoints/sleep --ignore=entrypoints/instrumentator --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py --ignore=entrypoints/pooling
- pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/openai --ignore=entrypoints/rpc --ignore=entrypoints/instrumentator --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py --ignore=entrypoints/pooling
- label: Entrypoints Integration Test (LLM) # 30min
timeout_in_minutes: 40
@@ -179,14 +179,14 @@ steps:
torch_nightly: true
source_file_dependencies:
- vllm/
- tests/entrypoints/sleep
- tests/entrypoints/rpc
- tests/entrypoints/instrumentator
- tests/tool_use
commands:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- pytest -v -s entrypoints/sleep
- pytest -v -s entrypoints/instrumentator
- PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc
- pytest -v -s tool_use
- PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc
- label: Entrypoints Integration Test (Pooling)
timeout_in_minutes: 50

View File

@@ -118,7 +118,7 @@ steps:
- tests/entrypoints/
commands:
- pytest -v -s entrypoints/openai/tool_parsers
- pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/rpc --ignore=entrypoints/sleep --ignore=entrypoints/instrumentator --ignore=entrypoints/openai --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py --ignore=entrypoints/pooling
- pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/rpc --ignore=entrypoints/instrumentator --ignore=entrypoints/openai --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py --ignore=entrypoints/pooling
- label: Entrypoints Integration Test (LLM) # 30min
timeout_in_minutes: 40
@@ -148,7 +148,7 @@ steps:
- tests/entrypoints/test_chat_utils
commands:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses
- pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/instrumentator --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses
- pytest -v -s entrypoints/test_chat_utils.py
- label: Entrypoints Integration Test (API Server 2)
@@ -159,13 +159,13 @@ steps:
torch_nightly: true
source_file_dependencies:
- vllm/
- tests/entrypoints/sleep
- tests/entrypoints/rpc
- tests/entrypoints/instrumentator
- tests/tool_use
commands:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- pytest -v -s entrypoints/sleep
- PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc
- pytest -v -s entrypoints/instrumentator
- PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc
- pytest -v -s tool_use
- label: Entrypoints Integration Test (Pooling)

View File

@@ -42,15 +42,13 @@ steps:
working_dir: "/vllm-workspace/tests"
source_file_dependencies:
- vllm/
- tests/tool_use
- tests/entrypoints/sleep
- tests/entrypoints/instrumentator
- tests/entrypoints/rpc
- tests/entrypoints/instrumentator
- tests/tool_use
commands:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc
- pytest -v -s entrypoints/instrumentator
- pytest -v -s entrypoints/sleep
- PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc
- pytest -v -s tool_use
- label: Entrypoints Integration (Pooling)

View File

@@ -178,10 +178,6 @@ def build_app(
app = FastAPI(lifespan=lifespan)
app.state.args = args
from vllm.entrypoints.openai.basic.api_router import register_basic_api_routers
register_basic_api_routers(app)
from vllm.entrypoints.serve import register_vllm_serve_api_routers
register_vllm_serve_api_routers(app)
@@ -205,6 +201,24 @@ def build_app(
register_generate_api_routers(app)
from vllm.entrypoints.serve.disagg.api_router import (
attach_router as attach_disagg_router,
)
attach_disagg_router(app)
from vllm.entrypoints.serve.rlhf.api_router import (
attach_router as attach_rlhf_router,
)
attach_rlhf_router(app)
from vllm.entrypoints.serve.elastic_ep.api_router import (
attach_router as elastic_ep_attach_router,
)
elastic_ep_attach_router(app)
if "transcription" in supported_tasks:
from vllm.entrypoints.openai.speech_to_text.api_router import (
attach_router as register_speech_to_text_api_router,

View File

@@ -10,10 +10,10 @@ import pydantic
from fastapi import APIRouter, Depends, FastAPI, HTTPException, Request
from fastapi.responses import JSONResponse, Response
from vllm.entrypoints.openai.basic.api_router import base
from vllm.entrypoints.openai.engine.protocol import ErrorResponse
from vllm.entrypoints.openai.engine.serving import OpenAIServing
from vllm.entrypoints.openai.utils import validate_json_request
from vllm.entrypoints.serve.instrumentator.basic import base
from vllm.entrypoints.serve.instrumentator.health import health
from vllm.tasks import POOLING_TASKS, SupportedTask

View File

@@ -22,12 +22,6 @@ def register_vllm_serve_api_routers(app: FastAPI):
attach_lora_router(app)
from vllm.entrypoints.serve.elastic_ep.api_router import (
attach_router as attach_elastic_ep_router,
)
attach_elastic_ep_router(app)
from vllm.entrypoints.serve.profile.api_router import (
attach_router as attach_profile_router,
)
@@ -58,37 +52,6 @@ def register_vllm_serve_api_routers(app: FastAPI):
attach_tokenize_router(app)
from vllm.entrypoints.serve.disagg.api_router import (
attach_router as attach_disagg_router,
)
from .instrumentator import register_instrumentator_api_routers
attach_disagg_router(app)
from vllm.entrypoints.serve.rlhf.api_router import (
attach_router as attach_rlhf_router,
)
attach_rlhf_router(app)
from vllm.entrypoints.serve.instrumentator.metrics import (
attach_router as attach_metrics_router,
)
attach_metrics_router(app)
from vllm.entrypoints.serve.instrumentator.health import (
attach_router as attach_health_router,
)
attach_health_router(app)
from vllm.entrypoints.serve.instrumentator.offline_docs import (
attach_router as attach_offline_docs_router,
)
attach_offline_docs_router(app)
from vllm.entrypoints.serve.instrumentator.server_info import (
attach_router as attach_server_info_router,
)
attach_server_info_router(app)
register_instrumentator_api_routers(app)

View File

@@ -0,0 +1,29 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from fastapi import FastAPI
from vllm import envs
def register_instrumentator_api_routers(app: FastAPI):
from .basic import router as basic_router
app.include_router(basic_router)
from .health import router as health_router
app.include_router(health_router)
from .metrics import attach_router as metrics_attach_router
metrics_attach_router(app)
from .offline_docs import attach_router as offline_docs_attach_router
offline_docs_attach_router(app)
if envs.VLLM_SERVER_DEV_MODE:
from .server_info import router as server_info_router
app.include_router(server_info_router)

View File

@@ -1,7 +1,7 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from fastapi import APIRouter, FastAPI, Request
from fastapi import APIRouter, Request
from fastapi.responses import JSONResponse
from vllm.engine.protocol import EngineClient
@@ -55,7 +55,3 @@ async def get_server_load_metrics(request: Request):
async def show_version():
ver = {"version": VLLM_VERSION}
return JSONResponse(content=ver)
def register_basic_api_routers(app: FastAPI):
app.include_router(router)

View File

@@ -27,7 +27,3 @@ async def health(raw_request: Request) -> Response:
return Response(status_code=200)
except EngineDeadError:
return Response(status_code=503)
def attach_router(app):
app.include_router(router)

View File

@@ -7,7 +7,7 @@ import functools
from typing import Annotated, Literal
import pydantic
from fastapi import APIRouter, FastAPI, Query, Request
from fastapi import APIRouter, Query, Request
from fastapi.responses import JSONResponse
import vllm.envs as envs
@@ -57,9 +57,3 @@ async def show_server_info(
"system_env": await asyncio.to_thread(_get_system_env_info_cached),
}
return JSONResponse(content=server_info)
def attach_router(app: FastAPI):
if not envs.VLLM_SERVER_DEV_MODE:
return
app.include_router(router)