[Disagg] Support large batch size in proxy server and update NixlConnector doc for DP (#28782)

Signed-off-by: Ming Yang <minos.future@gmail.com>
This commit is contained in:
Ming Yang
2025-12-08 16:01:08 -08:00
committed by GitHub
parent 1fb632fdb6
commit 60d17251c9
3 changed files with 42 additions and 4 deletions

View File

@@ -26,9 +26,21 @@ async def lifespan(app: FastAPI):
)
app.state.prefill_client = httpx.AsyncClient(
timeout=None, base_url=prefiller_base_url
timeout=None,
base_url=prefiller_base_url,
limits=httpx.Limits(
max_connections=None,
max_keepalive_connections=None,
),
)
app.state.decode_client = httpx.AsyncClient(
timeout=None,
base_url=decoder_base_url,
limits=httpx.Limits(
max_connections=None,
max_keepalive_connections=None,
),
)
app.state.decode_client = httpx.AsyncClient(timeout=None, base_url=decoder_base_url)
yield
@@ -105,6 +117,11 @@ async def send_request_to_service(
headers = {"Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}"}
response = await client.post(endpoint, json=req_data, headers=headers)
response.raise_for_status()
# read/consume the response body to release the connection
# otherwise, it would http.ReadError
await response.aread()
return response