[Frontend][CI] Consolidate instrumentator entrypoints (#34123)
Signed-off-by: wang.yuqi <yuqi.wang@daocloud.io>
This commit is contained in:
@@ -132,7 +132,7 @@ steps:
|
||||
- tests/entrypoints/
|
||||
commands:
|
||||
- pytest -v -s entrypoints/openai/tool_parsers
|
||||
- pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/openai --ignore=entrypoints/rpc --ignore=entrypoints/sleep --ignore=entrypoints/instrumentator --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py --ignore=entrypoints/pooling
|
||||
- pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/openai --ignore=entrypoints/rpc --ignore=entrypoints/instrumentator --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py --ignore=entrypoints/pooling
|
||||
|
||||
- label: Entrypoints Integration Test (LLM) # 30min
|
||||
timeout_in_minutes: 40
|
||||
@@ -179,14 +179,14 @@ steps:
|
||||
torch_nightly: true
|
||||
source_file_dependencies:
|
||||
- vllm/
|
||||
- tests/entrypoints/sleep
|
||||
- tests/entrypoints/rpc
|
||||
- tests/entrypoints/instrumentator
|
||||
- tests/tool_use
|
||||
commands:
|
||||
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
|
||||
- pytest -v -s entrypoints/sleep
|
||||
- pytest -v -s entrypoints/instrumentator
|
||||
- PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc
|
||||
- pytest -v -s tool_use
|
||||
- PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc
|
||||
|
||||
- label: Entrypoints Integration Test (Pooling)
|
||||
timeout_in_minutes: 50
|
||||
|
||||
@@ -118,7 +118,7 @@ steps:
|
||||
- tests/entrypoints/
|
||||
commands:
|
||||
- pytest -v -s entrypoints/openai/tool_parsers
|
||||
- pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/rpc --ignore=entrypoints/sleep --ignore=entrypoints/instrumentator --ignore=entrypoints/openai --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py --ignore=entrypoints/pooling
|
||||
- pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/rpc --ignore=entrypoints/instrumentator --ignore=entrypoints/openai --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py --ignore=entrypoints/pooling
|
||||
|
||||
- label: Entrypoints Integration Test (LLM) # 30min
|
||||
timeout_in_minutes: 40
|
||||
@@ -148,7 +148,7 @@ steps:
|
||||
- tests/entrypoints/test_chat_utils
|
||||
commands:
|
||||
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
|
||||
- pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses
|
||||
- pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/instrumentator --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses
|
||||
- pytest -v -s entrypoints/test_chat_utils.py
|
||||
|
||||
- label: Entrypoints Integration Test (API Server 2)
|
||||
@@ -159,13 +159,13 @@ steps:
|
||||
torch_nightly: true
|
||||
source_file_dependencies:
|
||||
- vllm/
|
||||
- tests/entrypoints/sleep
|
||||
- tests/entrypoints/rpc
|
||||
- tests/entrypoints/instrumentator
|
||||
- tests/tool_use
|
||||
commands:
|
||||
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
|
||||
- pytest -v -s entrypoints/sleep
|
||||
- PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc
|
||||
- pytest -v -s entrypoints/instrumentator
|
||||
- PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc
|
||||
- pytest -v -s tool_use
|
||||
|
||||
- label: Entrypoints Integration Test (Pooling)
|
||||
|
||||
@@ -42,15 +42,13 @@ steps:
|
||||
working_dir: "/vllm-workspace/tests"
|
||||
source_file_dependencies:
|
||||
- vllm/
|
||||
- tests/tool_use
|
||||
- tests/entrypoints/sleep
|
||||
- tests/entrypoints/instrumentator
|
||||
- tests/entrypoints/rpc
|
||||
- tests/entrypoints/instrumentator
|
||||
- tests/tool_use
|
||||
commands:
|
||||
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
|
||||
- PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc
|
||||
- pytest -v -s entrypoints/instrumentator
|
||||
- pytest -v -s entrypoints/sleep
|
||||
- PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc
|
||||
- pytest -v -s tool_use
|
||||
|
||||
- label: Entrypoints Integration (Pooling)
|
||||
|
||||
@@ -178,10 +178,6 @@ def build_app(
|
||||
app = FastAPI(lifespan=lifespan)
|
||||
app.state.args = args
|
||||
|
||||
from vllm.entrypoints.openai.basic.api_router import register_basic_api_routers
|
||||
|
||||
register_basic_api_routers(app)
|
||||
|
||||
from vllm.entrypoints.serve import register_vllm_serve_api_routers
|
||||
|
||||
register_vllm_serve_api_routers(app)
|
||||
@@ -205,6 +201,24 @@ def build_app(
|
||||
|
||||
register_generate_api_routers(app)
|
||||
|
||||
from vllm.entrypoints.serve.disagg.api_router import (
|
||||
attach_router as attach_disagg_router,
|
||||
)
|
||||
|
||||
attach_disagg_router(app)
|
||||
|
||||
from vllm.entrypoints.serve.rlhf.api_router import (
|
||||
attach_router as attach_rlhf_router,
|
||||
)
|
||||
|
||||
attach_rlhf_router(app)
|
||||
|
||||
from vllm.entrypoints.serve.elastic_ep.api_router import (
|
||||
attach_router as elastic_ep_attach_router,
|
||||
)
|
||||
|
||||
elastic_ep_attach_router(app)
|
||||
|
||||
if "transcription" in supported_tasks:
|
||||
from vllm.entrypoints.openai.speech_to_text.api_router import (
|
||||
attach_router as register_speech_to_text_api_router,
|
||||
|
||||
@@ -10,10 +10,10 @@ import pydantic
|
||||
from fastapi import APIRouter, Depends, FastAPI, HTTPException, Request
|
||||
from fastapi.responses import JSONResponse, Response
|
||||
|
||||
from vllm.entrypoints.openai.basic.api_router import base
|
||||
from vllm.entrypoints.openai.engine.protocol import ErrorResponse
|
||||
from vllm.entrypoints.openai.engine.serving import OpenAIServing
|
||||
from vllm.entrypoints.openai.utils import validate_json_request
|
||||
from vllm.entrypoints.serve.instrumentator.basic import base
|
||||
from vllm.entrypoints.serve.instrumentator.health import health
|
||||
from vllm.tasks import POOLING_TASKS, SupportedTask
|
||||
|
||||
|
||||
@@ -22,12 +22,6 @@ def register_vllm_serve_api_routers(app: FastAPI):
|
||||
|
||||
attach_lora_router(app)
|
||||
|
||||
from vllm.entrypoints.serve.elastic_ep.api_router import (
|
||||
attach_router as attach_elastic_ep_router,
|
||||
)
|
||||
|
||||
attach_elastic_ep_router(app)
|
||||
|
||||
from vllm.entrypoints.serve.profile.api_router import (
|
||||
attach_router as attach_profile_router,
|
||||
)
|
||||
@@ -58,37 +52,6 @@ def register_vllm_serve_api_routers(app: FastAPI):
|
||||
|
||||
attach_tokenize_router(app)
|
||||
|
||||
from vllm.entrypoints.serve.disagg.api_router import (
|
||||
attach_router as attach_disagg_router,
|
||||
)
|
||||
from .instrumentator import register_instrumentator_api_routers
|
||||
|
||||
attach_disagg_router(app)
|
||||
|
||||
from vllm.entrypoints.serve.rlhf.api_router import (
|
||||
attach_router as attach_rlhf_router,
|
||||
)
|
||||
|
||||
attach_rlhf_router(app)
|
||||
|
||||
from vllm.entrypoints.serve.instrumentator.metrics import (
|
||||
attach_router as attach_metrics_router,
|
||||
)
|
||||
|
||||
attach_metrics_router(app)
|
||||
|
||||
from vllm.entrypoints.serve.instrumentator.health import (
|
||||
attach_router as attach_health_router,
|
||||
)
|
||||
|
||||
attach_health_router(app)
|
||||
|
||||
from vllm.entrypoints.serve.instrumentator.offline_docs import (
|
||||
attach_router as attach_offline_docs_router,
|
||||
)
|
||||
|
||||
attach_offline_docs_router(app)
|
||||
from vllm.entrypoints.serve.instrumentator.server_info import (
|
||||
attach_router as attach_server_info_router,
|
||||
)
|
||||
|
||||
attach_server_info_router(app)
|
||||
register_instrumentator_api_routers(app)
|
||||
|
||||
@@ -0,0 +1,29 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
from fastapi import FastAPI
|
||||
|
||||
from vllm import envs
|
||||
|
||||
|
||||
def register_instrumentator_api_routers(app: FastAPI):
|
||||
from .basic import router as basic_router
|
||||
|
||||
app.include_router(basic_router)
|
||||
|
||||
from .health import router as health_router
|
||||
|
||||
app.include_router(health_router)
|
||||
|
||||
from .metrics import attach_router as metrics_attach_router
|
||||
|
||||
metrics_attach_router(app)
|
||||
|
||||
from .offline_docs import attach_router as offline_docs_attach_router
|
||||
|
||||
offline_docs_attach_router(app)
|
||||
|
||||
if envs.VLLM_SERVER_DEV_MODE:
|
||||
from .server_info import router as server_info_router
|
||||
|
||||
app.include_router(server_info_router)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
from fastapi import APIRouter, FastAPI, Request
|
||||
from fastapi import APIRouter, Request
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
from vllm.engine.protocol import EngineClient
|
||||
@@ -55,7 +55,3 @@ async def get_server_load_metrics(request: Request):
|
||||
async def show_version():
|
||||
ver = {"version": VLLM_VERSION}
|
||||
return JSONResponse(content=ver)
|
||||
|
||||
|
||||
def register_basic_api_routers(app: FastAPI):
|
||||
app.include_router(router)
|
||||
@@ -27,7 +27,3 @@ async def health(raw_request: Request) -> Response:
|
||||
return Response(status_code=200)
|
||||
except EngineDeadError:
|
||||
return Response(status_code=503)
|
||||
|
||||
|
||||
def attach_router(app):
|
||||
app.include_router(router)
|
||||
|
||||
@@ -7,7 +7,7 @@ import functools
|
||||
from typing import Annotated, Literal
|
||||
|
||||
import pydantic
|
||||
from fastapi import APIRouter, FastAPI, Query, Request
|
||||
from fastapi import APIRouter, Query, Request
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
import vllm.envs as envs
|
||||
@@ -57,9 +57,3 @@ async def show_server_info(
|
||||
"system_env": await asyncio.to_thread(_get_system_env_info_cached),
|
||||
}
|
||||
return JSONResponse(content=server_info)
|
||||
|
||||
|
||||
def attach_router(app: FastAPI):
|
||||
if not envs.VLLM_SERVER_DEV_MODE:
|
||||
return
|
||||
app.include_router(router)
|
||||
|
||||
Reference in New Issue
Block a user