diff --git a/vllm_middleware.py b/vllm_middleware.py
index 2c1a66b..d4caffc 100644
--- a/vllm_middleware.py
+++ b/vllm_middleware.py
@@ -18,6 +18,7 @@ import json
 import os
 import asyncio
 import httpx
+from datetime import datetime
 from fastapi import FastAPI, Request
 from fastapi.responses import StreamingResponse, Response
 import uvicorn
@@ -87,6 +88,47 @@ async def health():
         return Response(content="SGLang not ready", status_code=503)
 
 
+ERROR_LOG = os.environ.get("VLLM_SHIM_LOG", "/tmp/vllm-shim.log")
+
+
+def _dump_error(request_body: bytes, status_code: int, resp_headers: dict, resp_body_raw: bytes, path: str = ""):
+    """Log full request + response payload when SGLang returns an error (4xx/5xx)."""
+    try:
+        ts = datetime.now().isoformat()
+        req_json = None
+        try:
+            req_json = json.loads(request_body)
+        except (json.JSONDecodeError, UnicodeDecodeError):
+            pass
+
+        resp_text = resp_body_raw.decode("utf-8", errors="replace")[:4000]
+        resp_json = None
+        try:
+            resp_json = json.loads(resp_text)
+        except (json.JSONDecodeError, UnicodeDecodeError):
+            pass
+
+        with open(ERROR_LOG, "a") as f:
+            f.write(f"\n{'='*60}\n")
+            f.write(f"[{ts}] ERROR DUMP — SGLang returned HTTP {status_code}\n")
+            f.write(f"Path: {path}\n")
+            f.write(f"--- Request Body ---\n")
+            if req_json:
+                f.write(json.dumps(req_json, indent=2, ensure_ascii=False)[:8000])
+            else:
+                f.write(request_body.decode("utf-8", errors="replace")[:8000])
+            f.write(f"\n--- Response (HTTP {status_code}) ---\n")
+            if resp_json:
+                f.write(json.dumps(resp_json, indent=2, ensure_ascii=False)[:4000])
+            else:
+                f.write(resp_text)
+            f.write(f"\n{'='*60}\n")
+
+        print(f"[{ts}] ERROR DUMP: HTTP {status_code} on {path} — full payload written to {ERROR_LOG}")
+    except Exception as e:
+        print(f"_dump_error failed: {e}")
+
+
 @app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS"])
 async def proxy(path: str, request: Request):
     body = await request.body()
@@ -123,6 +165,17 @@ async def proxy(path: str, request: Request):
             req = client.build_request(request.method, url, content=body, headers=fwd_headers)
             resp = await client.send(req, stream=True)
 
+            # Dump on error for streaming responses
+            if resp.status_code >= 400:
+                error_body = await resp.aread()
+                _dump_error(body, resp.status_code, resp_headers=dict(resp.headers), resp_body_raw=error_body, path=path)
+                await resp.aclose()
+                return Response(
+                    content=error_body,
+                    status_code=resp.status_code,
+                    media_type=resp.headers.get("content-type"),
+                )
+
             async def stream_body():
                 try:
                     async for chunk in resp.aiter_bytes():
@@ -137,6 +190,11 @@ async def proxy(path: str, request: Request):
             )
         else:
             resp = await client.request(request.method, url, content=body, headers=fwd_headers)
+
+            # Dump on error
+            if resp.status_code >= 400:
+                _dump_error(body, resp.status_code, resp_headers=dict(resp.headers), resp_body_raw=resp.content, path=path)
+
             return Response(
                 content=resp.content,
                 status_code=resp.status_code,