fix: haproxy 2.4 compat — use errorfile instead of http-request return

haproxy 2.4 (Ubuntu 22.04) doesn't support http-request return with
payload/content-type syntax (that's 2.8+). Switch to errorfile-based
stub responses: http-request deny deny_status N + errorfile N path.
This commit is contained in:
2026-04-12 17:26:45 +00:00
parent 1ddc08c88b
commit 725e61d792
2 changed files with 37 additions and 11 deletions

View File

@@ -12,6 +12,9 @@ set -euo pipefail
# /health → 200 if SGLang backend is up, 503 if not (instant) # /health → 200 if SGLang backend is up, 503 if not (instant)
# /* → proxy to SGLang on port+1 # /* → proxy to SGLang on port+1
# SGLang on port+1 (internal) # SGLang on port+1 (internal)
#
# haproxy 2.4 compat: uses errorfile for stub responses instead
# of http-request return (which needs 2.8+ for payload syntax).
# ============================================================ # ============================================================
echo "" echo ""
@@ -66,11 +69,16 @@ echo "Launching SGLang on ${HOST}:${SGLANG_PORT} (internal)"
echo "Launching haproxy on ${HOST}:${PORT} (front door, /metrics + /health stub)" echo "Launching haproxy on ${HOST}:${PORT} (front door, /metrics + /health stub)"
echo "" echo ""
# Write haproxy config # Prepare error files for haproxy stub responses
# haproxy errorfile format: HTTP/1.x status_code reason\r\nheaders\r\n\r\nbody
mkdir -p /tmp/haproxy-errors
printf "HTTP/1.0 200 OK\r\nContent-Length: 0\r\nConnection: close\r\n\r\n" > /tmp/haproxy-errors/200-empty.http
printf "HTTP/1.0 503 Service Unavailable\r\nContent-Length: 15\r\nConnection: close\r\nContent-Type: text/plain\r\n\r\nSGLang not ready" > /tmp/haproxy-errors/503-sglang.http
# Write haproxy config (compatible with haproxy 2.4)
HAPROXY_CFG="/tmp/haproxy-shim.cfg" HAPROXY_CFG="/tmp/haproxy-shim.cfg"
cat > "$HAPROXY_CFG" <<EOF cat > "$HAPROXY_CFG" <<EOF
global global
log /dev/log local0
maxconn 4096 maxconn 4096
defaults defaults
@@ -82,15 +90,19 @@ defaults
frontend proxy frontend proxy
bind ${HOST}:${PORT} bind ${HOST}:${PORT}
# /metrics stub — instant 200 empty (vLLm stack expects this) # /metrics stub — instant 200 empty (vLLM stack expects this)
http-request return status 200 content-type text/plain "" if { path /metrics } acl is_metrics path /metrics
http-request deny deny_status 200 if is_metrics
errorfile 200 /tmp/haproxy-errors/200-empty.http
# /health — instant response based on SGLang backend state # /health — instant response based on SGLang backend state
# haproxy health-checks SGLang in the background; this avoids # haproxy health-checks SGLang in the background; this avoids
# the 1s k8s probe timeout racing SGLang's ~1.001s /health response # the 1s k8s probe timeout racing SGLang's ~1.001s /health response
acl is_health path /health
acl sglang_up nbsrv(sglang) gt 0 acl sglang_up nbsrv(sglang) gt 0
http-request return status 200 content-type text/plain "" if { path /health } sglang_up http-request deny deny_status 200 if is_health sglang_up
http-request return status 503 content-type text/plain "SGLang not ready" if { path /health } http-request deny deny_status 503 if is_health
errorfile 503 /tmp/haproxy-errors/503-sglang.http
default_backend sglang default_backend sglang

View File

@@ -9,6 +9,9 @@ Architecture:
/health → 200 if SGLang backend is up, 503 if not (instant) /health → 200 if SGLang backend is up, 503 if not (instant)
/* → proxy to SGLang on port+1 /* → proxy to SGLang on port+1
SGLang on port+1 (internal) SGLang on port+1 (internal)
haproxy 2.4 compat: uses errorfile for stub responses instead
of http-request return (which needs 2.8+ for payload syntax).
""" """
import os import os
import sys import sys
@@ -67,11 +70,18 @@ def main():
print(f"Launching haproxy on {host}:{port} (front door, /metrics + /health stub)") print(f"Launching haproxy on {host}:{port} (front door, /metrics + /health stub)")
print() print()
# Write haproxy config # Prepare error files for haproxy stub responses
# haproxy errorfile format: HTTP/1.x status_code reason\r\nheaders\r\n\r\nbody
os.makedirs("/tmp/haproxy-errors", exist_ok=True)
with open("/tmp/haproxy-errors/200-empty.http", "w") as f:
f.write("HTTP/1.0 200 OK\r\nContent-Length: 0\r\nConnection: close\r\n\r\n")
with open("/tmp/haproxy-errors/503-sglang.http", "w") as f:
f.write("HTTP/1.0 503 Service Unavailable\r\nContent-Length: 15\r\nConnection: close\r\nContent-Type: text/plain\r\n\r\nSGLang not ready")
# Write haproxy config (compatible with haproxy 2.4)
haproxy_cfg = "/tmp/haproxy-shim.cfg" haproxy_cfg = "/tmp/haproxy-shim.cfg"
with open(haproxy_cfg, "w") as f: with open(haproxy_cfg, "w") as f:
f.write(f"""global f.write(f"""global
log /dev/log local0
maxconn 4096 maxconn 4096
defaults defaults
@@ -84,14 +94,18 @@ frontend proxy
bind {host}:{port} bind {host}:{port}
# /metrics stub — instant 200 empty (vLLM stack expects this) # /metrics stub — instant 200 empty (vLLM stack expects this)
http-request return status 200 content-type text/plain "" if {{ path /metrics }} acl is_metrics path /metrics
http-request deny deny_status 200 if is_metrics
errorfile 200 /tmp/haproxy-errors/200-empty.http
# /health — instant response based on SGLang backend state # /health — instant response based on SGLang backend state
# haproxy health-checks SGLang in the background; this avoids # haproxy health-checks SGLang in the background; this avoids
# the 1s k8s probe timeout racing SGLang's ~1.001s /health response # the 1s k8s probe timeout racing SGLang's ~1.001s /health response
acl is_health path /health
acl sglang_up nbsrv(sglang) gt 0 acl sglang_up nbsrv(sglang) gt 0
http-request return status 200 content-type text/plain "" if {{ path /health }} sglang_up http-request deny deny_status 200 if is_health sglang_up
http-request return status 503 content-type text/plain "SGLang not ready" if {{ path /health }} http-request deny deny_status 503 if is_health
errorfile 503 /tmp/haproxy-errors/503-sglang.http
default_backend sglang default_backend sglang