Add error dump logging: capture full request+response on 4xx/5xx from SGLang

This commit is contained in:
2026-04-12 19:28:04 +00:00
parent db9231f796
commit 774964a4db

View File

@@ -18,6 +18,7 @@ import json
import os import os
import asyncio import asyncio
import httpx import httpx
from datetime import datetime
from fastapi import FastAPI, Request from fastapi import FastAPI, Request
from fastapi.responses import StreamingResponse, Response from fastapi.responses import StreamingResponse, Response
import uvicorn import uvicorn
@@ -87,6 +88,47 @@ async def health():
return Response(content="SGLang not ready", status_code=503) return Response(content="SGLang not ready", status_code=503)
ERROR_LOG = os.environ.get("VLLM_SHIM_LOG", "/tmp/vllm-shim.log")
def _dump_error(request_body: bytes, status_code: int, resp_headers: dict, resp_body_raw: bytes, path: str = ""):
"""Log full request + response payload when SGLang returns an error (4xx/5xx)."""
try:
ts = datetime.now().isoformat()
req_json = None
try:
req_json = json.loads(request_body)
except (json.JSONDecodeError, UnicodeDecodeError):
pass
resp_text = resp_body_raw.decode("utf-8", errors="replace")[:4000]
resp_json = None
try:
resp_json = json.loads(resp_text)
except (json.JSONDecodeError, UnicodeDecodeError):
pass
with open(ERROR_LOG, "a") as f:
f.write(f"\n{'='*60}\n")
f.write(f"[{ts}] ERROR DUMP — SGLang returned HTTP {status_code}\n")
f.write(f"Path: {path}\n")
f.write(f"--- Request Body ---\n")
if req_json:
f.write(json.dumps(req_json, indent=2, ensure_ascii=False)[:8000])
else:
f.write(request_body.decode("utf-8", errors="replace")[:8000])
f.write(f"\n--- Response (HTTP {status_code}) ---\n")
if resp_json:
f.write(json.dumps(resp_json, indent=2, ensure_ascii=False)[:4000])
else:
f.write(resp_text)
f.write(f"\n{'='*60}\n")
print(f"[{ts}] ERROR DUMP: HTTP {status_code} on {path} — full payload written to {ERROR_LOG}")
except Exception as e:
print(f"_dump_error failed: {e}")
@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS"]) @app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS"])
async def proxy(path: str, request: Request): async def proxy(path: str, request: Request):
body = await request.body() body = await request.body()
@@ -123,6 +165,17 @@ async def proxy(path: str, request: Request):
req = client.build_request(request.method, url, content=body, headers=fwd_headers) req = client.build_request(request.method, url, content=body, headers=fwd_headers)
resp = await client.send(req, stream=True) resp = await client.send(req, stream=True)
# Dump on error for streaming responses
if resp.status_code >= 400:
error_body = await resp.aread()
_dump_error(body, resp.status_code, resp_headers=dict(resp.headers), resp_body_raw=error_body, path=path)
await resp.aclose()
return Response(
content=error_body,
status_code=resp.status_code,
media_type=resp.headers.get("content-type"),
)
async def stream_body(): async def stream_body():
try: try:
async for chunk in resp.aiter_bytes(): async for chunk in resp.aiter_bytes():
@@ -137,6 +190,11 @@ async def proxy(path: str, request: Request):
) )
else: else:
resp = await client.request(request.method, url, content=body, headers=fwd_headers) resp = await client.request(request.method, url, content=body, headers=fwd_headers)
# Dump on error
if resp.status_code >= 400:
_dump_error(body, resp.status_code, resp_headers=dict(resp.headers), resp_body_raw=resp.content, path=path)
return Response( return Response(
content=resp.content, content=resp.content,
status_code=resp.status_code, status_code=resp.status_code,