fix: haproxy 2.4 compat — use errorfile instead of http-request return
haproxy 2.4 (Ubuntu 22.04) doesn't support http-request return with payload/content-type syntax (that's 2.8+). Switch to errorfile-based stub responses: http-request deny deny_status N + errorfile N path.
This commit is contained in:
24
vllm-shim.sh
24
vllm-shim.sh
@@ -12,6 +12,9 @@ set -euo pipefail
|
||||
# /health → 200 if SGLang backend is up, 503 if not (instant)
|
||||
# /* → proxy to SGLang on port+1
|
||||
# SGLang on port+1 (internal)
|
||||
#
|
||||
# haproxy 2.4 compat: uses errorfile for stub responses instead
|
||||
# of http-request return (which needs 2.8+ for payload syntax).
|
||||
# ============================================================
|
||||
|
||||
echo ""
|
||||
@@ -66,11 +69,16 @@ echo "Launching SGLang on ${HOST}:${SGLANG_PORT} (internal)"
|
||||
echo "Launching haproxy on ${HOST}:${PORT} (front door, /metrics + /health stub)"
|
||||
echo ""
|
||||
|
||||
# Write haproxy config
|
||||
# Prepare error files for haproxy stub responses
|
||||
# haproxy errorfile format: HTTP/1.x status_code reason\r\nheaders\r\n\r\nbody
|
||||
mkdir -p /tmp/haproxy-errors
|
||||
printf "HTTP/1.0 200 OK\r\nContent-Length: 0\r\nConnection: close\r\n\r\n" > /tmp/haproxy-errors/200-empty.http
|
||||
printf "HTTP/1.0 503 Service Unavailable\r\nContent-Length: 15\r\nConnection: close\r\nContent-Type: text/plain\r\n\r\nSGLang not ready" > /tmp/haproxy-errors/503-sglang.http
|
||||
|
||||
# Write haproxy config (compatible with haproxy 2.4)
|
||||
HAPROXY_CFG="/tmp/haproxy-shim.cfg"
|
||||
cat > "$HAPROXY_CFG" <<EOF
|
||||
global
|
||||
log /dev/log local0
|
||||
maxconn 4096
|
||||
|
||||
defaults
|
||||
@@ -82,15 +90,19 @@ defaults
|
||||
frontend proxy
|
||||
bind ${HOST}:${PORT}
|
||||
|
||||
# /metrics stub — instant 200 empty (vLLm stack expects this)
|
||||
http-request return status 200 content-type text/plain "" if { path /metrics }
|
||||
# /metrics stub — instant 200 empty (vLLM stack expects this)
|
||||
acl is_metrics path /metrics
|
||||
http-request deny deny_status 200 if is_metrics
|
||||
errorfile 200 /tmp/haproxy-errors/200-empty.http
|
||||
|
||||
# /health — instant response based on SGLang backend state
|
||||
# haproxy health-checks SGLang in the background; this avoids
|
||||
# the 1s k8s probe timeout racing SGLang's ~1.001s /health response
|
||||
acl is_health path /health
|
||||
acl sglang_up nbsrv(sglang) gt 0
|
||||
http-request return status 200 content-type text/plain "" if { path /health } sglang_up
|
||||
http-request return status 503 content-type text/plain "SGLang not ready" if { path /health }
|
||||
http-request deny deny_status 200 if is_health sglang_up
|
||||
http-request deny deny_status 503 if is_health
|
||||
errorfile 503 /tmp/haproxy-errors/503-sglang.http
|
||||
|
||||
default_backend sglang
|
||||
|
||||
|
||||
Reference in New Issue
Block a user