[Refactor] [1/N] to simplify the vLLM serving architecture (#28040)

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
2025-12-03 17:26:39 +08:00
parent 69520bc695
commit 3f42b05fbc
27 changed files with 850 additions and 455 deletions
--- a/tests/entrypoints/openai/test_basic.py
+++ b/tests/entrypoints/openai/test_basic.py
@@ -232,7 +232,7 @@ async def test_server_load(server: RemoteOpenAIServer):
@pytest.mark.asyncio
 async def test_health_check_engine_dead_error():
    # Import the health function directly to test it in isolation
-    from vllm.entrypoints.openai.api_server import health
+    from vllm.entrypoints.serve.instrumentator.health import health

    # Create a mock request that simulates what FastAPI would provide
    mock_request = Mock(spec=Request)