use a shim

2026-04-12 02:19:55 +00:00
parent c86fbe0166
commit 4d444bebbb
3 changed files with 135 additions and 3 deletions
--- a/vllm-shim.sh
+++ b/vllm-shim.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+set -euo pipefail
+
+# ============================================================
+# vLLM -> SGLang shim
+# This script replaces the vllm binary. The k8s production stack
+# calls `vllm serve <model> [flags]`, and we intercept everything.
+# ============================================================
+
+echo ""
+echo "=========================================="
+echo "  vLLM -> SGLang Shim"
+echo "=========================================="
+echo "  Invoked as: vllm $*"
+echo ""
+echo "  All arguments received:"
+i=1
+for arg in "$@"; do
+  echo "    [$i] $arg"
+  i=$((i + 1))
+done
+echo "=========================================="
+echo ""
+
+# Defaults
+HOST="0.0.0.0"
+PORT="8000"
+
+# Parse host and port from whatever the stack sends
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    serve)        shift ;;  # skip the 'serve' subcommand
+    --host)       HOST="$2"; shift 2 ;;
+    --host=*)     HOST="${1#*=}"; shift ;;
+    --port)       PORT="$2"; shift 2 ;;
+    --port=*)     PORT="${1#*=}"; shift ;;
+    *)            shift ;;  # ignore everything else
+  esac
+done
+
+echo "Launching SGLang on ${HOST}:${PORT}"
+echo ""
+
+exec python -m sglang.launch_server \
+  --model-path mistralai/Devstral-2-123B-Instruct-2512 \
+  --host "$HOST" \
+  --port "$PORT" \
+  --tp 8 \
+  --tool-call-parser mistral