diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml
index e78cdd7f8..19fc79f61 100644
--- a/.buildkite/test-amd.yaml
+++ b/.buildkite/test-amd.yaml
@@ -132,7 +132,7 @@ steps:
   - tests/entrypoints/
   commands:
   - pytest -v -s entrypoints/openai/tool_parsers
-  - pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/openai --ignore=entrypoints/rpc --ignore=entrypoints/sleep --ignore=entrypoints/instrumentator --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py  --ignore=entrypoints/pooling
+  - pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/openai --ignore=entrypoints/rpc --ignore=entrypoints/instrumentator --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py  --ignore=entrypoints/pooling
 
 - label: Entrypoints Integration Test (LLM) # 30min
   timeout_in_minutes: 40
@@ -179,14 +179,14 @@ steps:
   torch_nightly: true
   source_file_dependencies:
   - vllm/
-  - tests/entrypoints/sleep
   - tests/entrypoints/rpc
+  - tests/entrypoints/instrumentator
   - tests/tool_use
   commands:
   - export VLLM_WORKER_MULTIPROC_METHOD=spawn
-  - pytest -v -s entrypoints/sleep
+  - pytest -v -s entrypoints/instrumentator
+  - PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc
   - pytest -v -s tool_use
-  - PYTHONPATH=/vllm-workspace  pytest -v -s entrypoints/rpc
 
 - label: Entrypoints Integration Test (Pooling)
   timeout_in_minutes: 50
diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index 73d4cf80c..74e0d19e0 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -118,7 +118,7 @@ steps:
   - tests/entrypoints/
   commands:
   - pytest -v -s entrypoints/openai/tool_parsers
-  - pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/rpc --ignore=entrypoints/sleep --ignore=entrypoints/instrumentator --ignore=entrypoints/openai --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py  --ignore=entrypoints/pooling
+  - pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/rpc --ignore=entrypoints/instrumentator --ignore=entrypoints/openai --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py  --ignore=entrypoints/pooling
 
 - label: Entrypoints Integration Test (LLM) # 30min
   timeout_in_minutes: 40
@@ -148,7 +148,7 @@ steps:
   - tests/entrypoints/test_chat_utils
   commands:
   - export VLLM_WORKER_MULTIPROC_METHOD=spawn
-  - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/  --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses
+  - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/instrumentator --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/  --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses
   - pytest -v -s entrypoints/test_chat_utils.py
 
 - label: Entrypoints Integration Test (API Server 2)
@@ -159,13 +159,13 @@ steps:
   torch_nightly: true
   source_file_dependencies:
   - vllm/
-  - tests/entrypoints/sleep
   - tests/entrypoints/rpc
+  - tests/entrypoints/instrumentator
   - tests/tool_use
   commands:
   - export VLLM_WORKER_MULTIPROC_METHOD=spawn
-  - pytest -v -s entrypoints/sleep
-  - PYTHONPATH=/vllm-workspace  pytest -v -s entrypoints/rpc
+  - pytest -v -s entrypoints/instrumentator
+  - PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc
   - pytest -v -s tool_use
 
 - label: Entrypoints Integration Test (Pooling)
diff --git a/.buildkite/test_areas/entrypoints.yaml b/.buildkite/test_areas/entrypoints.yaml
index 8e02d9f60..0c72e3d9b 100644
--- a/.buildkite/test_areas/entrypoints.yaml
+++ b/.buildkite/test_areas/entrypoints.yaml
@@ -42,15 +42,13 @@ steps:
   working_dir: "/vllm-workspace/tests"
   source_file_dependencies:
   - vllm/
-  - tests/tool_use
-  - tests/entrypoints/sleep
-  - tests/entrypoints/instrumentator
   - tests/entrypoints/rpc
+  - tests/entrypoints/instrumentator
+  - tests/tool_use
   commands:
   - export VLLM_WORKER_MULTIPROC_METHOD=spawn
-  - PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc
   - pytest -v -s entrypoints/instrumentator
-  - pytest -v -s entrypoints/sleep
+  - PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc
   - pytest -v -s tool_use
 
 - label: Entrypoints Integration (Pooling)
diff --git a/tests/entrypoints/openai/test_basic.py b/tests/entrypoints/instrumentator/test_basic.py
similarity index 100%
rename from tests/entrypoints/openai/test_basic.py
rename to tests/entrypoints/instrumentator/test_basic.py
diff --git a/tests/entrypoints/openai/test_optional_middleware.py b/tests/entrypoints/instrumentator/test_optional_middleware.py
similarity index 100%
rename from tests/entrypoints/openai/test_optional_middleware.py
rename to tests/entrypoints/instrumentator/test_optional_middleware.py
diff --git a/tests/entrypoints/openai/test_orca_metrics.py b/tests/entrypoints/instrumentator/test_orca_metrics.py
similarity index 100%
rename from tests/entrypoints/openai/test_orca_metrics.py
rename to tests/entrypoints/instrumentator/test_orca_metrics.py
diff --git a/tests/entrypoints/sleep/test_sleep.py b/tests/entrypoints/instrumentator/test_sleep.py
similarity index 100%
rename from tests/entrypoints/sleep/test_sleep.py
rename to tests/entrypoints/instrumentator/test_sleep.py
diff --git a/tests/entrypoints/sleep/__init__.py b/tests/entrypoints/sleep/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py
index 1ce706abc..d76a7446d 100644
--- a/vllm/entrypoints/openai/api_server.py
+++ b/vllm/entrypoints/openai/api_server.py
@@ -178,10 +178,6 @@ def build_app(
         app = FastAPI(lifespan=lifespan)
     app.state.args = args
 
-    from vllm.entrypoints.openai.basic.api_router import register_basic_api_routers
-
-    register_basic_api_routers(app)
-
     from vllm.entrypoints.serve import register_vllm_serve_api_routers
 
     register_vllm_serve_api_routers(app)
@@ -205,6 +201,24 @@ def build_app(
 
         register_generate_api_routers(app)
 
+        from vllm.entrypoints.serve.disagg.api_router import (
+            attach_router as attach_disagg_router,
+        )
+
+        attach_disagg_router(app)
+
+        from vllm.entrypoints.serve.rlhf.api_router import (
+            attach_router as attach_rlhf_router,
+        )
+
+        attach_rlhf_router(app)
+
+        from vllm.entrypoints.serve.elastic_ep.api_router import (
+            attach_router as elastic_ep_attach_router,
+        )
+
+        elastic_ep_attach_router(app)
+
     if "transcription" in supported_tasks:
         from vllm.entrypoints.openai.speech_to_text.api_router import (
             attach_router as register_speech_to_text_api_router,
diff --git a/vllm/entrypoints/openai/basic/__init__.py b/vllm/entrypoints/openai/basic/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/vllm/entrypoints/sagemaker/api_router.py b/vllm/entrypoints/sagemaker/api_router.py
index 7c5bae5b5..1138225c3 100644
--- a/vllm/entrypoints/sagemaker/api_router.py
+++ b/vllm/entrypoints/sagemaker/api_router.py
@@ -10,10 +10,10 @@ import pydantic
 from fastapi import APIRouter, Depends, FastAPI, HTTPException, Request
 from fastapi.responses import JSONResponse, Response
 
-from vllm.entrypoints.openai.basic.api_router import base
 from vllm.entrypoints.openai.engine.protocol import ErrorResponse
 from vllm.entrypoints.openai.engine.serving import OpenAIServing
 from vllm.entrypoints.openai.utils import validate_json_request
+from vllm.entrypoints.serve.instrumentator.basic import base
 from vllm.entrypoints.serve.instrumentator.health import health
 from vllm.tasks import POOLING_TASKS, SupportedTask
 
diff --git a/vllm/entrypoints/serve/__init__.py b/vllm/entrypoints/serve/__init__.py
index f5c80f682..8233d3324 100644
--- a/vllm/entrypoints/serve/__init__.py
+++ b/vllm/entrypoints/serve/__init__.py
@@ -22,12 +22,6 @@ def register_vllm_serve_api_routers(app: FastAPI):
 
     attach_lora_router(app)
 
-    from vllm.entrypoints.serve.elastic_ep.api_router import (
-        attach_router as attach_elastic_ep_router,
-    )
-
-    attach_elastic_ep_router(app)
-
     from vllm.entrypoints.serve.profile.api_router import (
         attach_router as attach_profile_router,
     )
@@ -58,37 +52,6 @@ def register_vllm_serve_api_routers(app: FastAPI):
 
     attach_tokenize_router(app)
 
-    from vllm.entrypoints.serve.disagg.api_router import (
-        attach_router as attach_disagg_router,
-    )
+    from .instrumentator import register_instrumentator_api_routers
 
-    attach_disagg_router(app)
-
-    from vllm.entrypoints.serve.rlhf.api_router import (
-        attach_router as attach_rlhf_router,
-    )
-
-    attach_rlhf_router(app)
-
-    from vllm.entrypoints.serve.instrumentator.metrics import (
-        attach_router as attach_metrics_router,
-    )
-
-    attach_metrics_router(app)
-
-    from vllm.entrypoints.serve.instrumentator.health import (
-        attach_router as attach_health_router,
-    )
-
-    attach_health_router(app)
-
-    from vllm.entrypoints.serve.instrumentator.offline_docs import (
-        attach_router as attach_offline_docs_router,
-    )
-
-    attach_offline_docs_router(app)
-    from vllm.entrypoints.serve.instrumentator.server_info import (
-        attach_router as attach_server_info_router,
-    )
-
-    attach_server_info_router(app)
+    register_instrumentator_api_routers(app)
diff --git a/vllm/entrypoints/serve/instrumentator/__init__.py b/vllm/entrypoints/serve/instrumentator/__init__.py
index e69de29bb..8abce0232 100644
--- a/vllm/entrypoints/serve/instrumentator/__init__.py
+++ b/vllm/entrypoints/serve/instrumentator/__init__.py
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from fastapi import FastAPI
+
+from vllm import envs
+
+
+def register_instrumentator_api_routers(app: FastAPI):
+    from .basic import router as basic_router
+
+    app.include_router(basic_router)
+
+    from .health import router as health_router
+
+    app.include_router(health_router)
+
+    from .metrics import attach_router as metrics_attach_router
+
+    metrics_attach_router(app)
+
+    from .offline_docs import attach_router as offline_docs_attach_router
+
+    offline_docs_attach_router(app)
+
+    if envs.VLLM_SERVER_DEV_MODE:
+        from .server_info import router as server_info_router
+
+        app.include_router(server_info_router)
diff --git a/vllm/entrypoints/openai/basic/api_router.py b/vllm/entrypoints/serve/instrumentator/basic.py
similarity index 92%
rename from vllm/entrypoints/openai/basic/api_router.py
rename to vllm/entrypoints/serve/instrumentator/basic.py
index 3378d914a..e6c96de0b 100644
--- a/vllm/entrypoints/openai/basic/api_router.py
+++ b/vllm/entrypoints/serve/instrumentator/basic.py
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-from fastapi import APIRouter, FastAPI, Request
+from fastapi import APIRouter, Request
 from fastapi.responses import JSONResponse
 
 from vllm.engine.protocol import EngineClient
@@ -55,7 +55,3 @@ async def get_server_load_metrics(request: Request):
 async def show_version():
     ver = {"version": VLLM_VERSION}
     return JSONResponse(content=ver)
-
-
-def register_basic_api_routers(app: FastAPI):
-    app.include_router(router)
diff --git a/vllm/entrypoints/serve/instrumentator/health.py b/vllm/entrypoints/serve/instrumentator/health.py
index 029ef677a..8b079ce31 100644
--- a/vllm/entrypoints/serve/instrumentator/health.py
+++ b/vllm/entrypoints/serve/instrumentator/health.py
@@ -27,7 +27,3 @@ async def health(raw_request: Request) -> Response:
         return Response(status_code=200)
     except EngineDeadError:
         return Response(status_code=503)
-
-
-def attach_router(app):
-    app.include_router(router)
diff --git a/vllm/entrypoints/serve/instrumentator/server_info.py b/vllm/entrypoints/serve/instrumentator/server_info.py
index d6ef994f3..60967c5a6 100644
--- a/vllm/entrypoints/serve/instrumentator/server_info.py
+++ b/vllm/entrypoints/serve/instrumentator/server_info.py
@@ -7,7 +7,7 @@ import functools
 from typing import Annotated, Literal
 
 import pydantic
-from fastapi import APIRouter, FastAPI, Query, Request
+from fastapi import APIRouter, Query, Request
 from fastapi.responses import JSONResponse
 
 import vllm.envs as envs
@@ -57,9 +57,3 @@ async def show_server_info(
         "system_env": await asyncio.to_thread(_get_system_env_info_cached),
     }
     return JSONResponse(content=server_info)
-
-
-def attach_router(app: FastAPI):
-    if not envs.VLLM_SERVER_DEV_MODE:
-        return
-    app.include_router(router)