[Refactor] [1/N] to simplify the vLLM serving architecture (#28040)

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
This commit is contained in:
Chauncey
2025-12-03 17:26:39 +08:00
committed by GitHub
parent 69520bc695
commit 3f42b05fbc
27 changed files with 850 additions and 455 deletions

View File

@@ -232,7 +232,7 @@ async def test_server_load(server: RemoteOpenAIServer):
@pytest.mark.asyncio
async def test_health_check_engine_dead_error():
# Import the health function directly to test it in isolation
from vllm.entrypoints.openai.api_server import health
from vllm.entrypoints.serve.instrumentator.health import health
# Create a mock request that simulates what FastAPI would provide
mock_request = Mock(spec=Request)