use a shim

2026-04-12 02:19:55 +00:00
parent c86fbe0166
commit 4d444bebbb
3 changed files with 135 additions and 3 deletions
--- a/25
+++ b/25
@@ -1,6 +1,25 @@
 FROM lmsysorg/sglang-rocm:v0.5.10rc0-rocm700-mi30x-20260411

-COPY entrypoint.sh /entrypoint.sh
-RUN chmod +x /entrypoint.sh
+# Replace the vllm binary with our shim so no matter how the
+# production stack invokes vllm, we intercept it
+COPY vllm-shim.sh /usr/local/bin/vllm
+RUN chmod +x /usr/local/bin/vllm

-ENTRYPOINT ["/entrypoint.sh"]
+# Also handle `python -m vllm.entrypoints.openai.api_server` and
+# `python -m vllm.entrypoints.cli.main` by shadowing the vllm package
+RUN mkdir -p /opt/vllm-shim/vllm/entrypoints/openai \
+             /opt/vllm-shim/vllm/entrypoints/cli
+COPY vllm_shim_module.py /opt/vllm-shim/vllm/__main__.py
+COPY vllm_shim_module.py /opt/vllm-shim/vllm/entrypoints/openai/api_server.py
+COPY vllm_shim_module.py /opt/vllm-shim/vllm/entrypoints/cli/main.py
+RUN touch /opt/vllm-shim/vllm/__init__.py \
+          /opt/vllm-shim/vllm/entrypoints/__init__.py \
+          /opt/vllm-shim/vllm/entrypoints/openai/__init__.py \
+          /opt/vllm-shim/vllm/entrypoints/cli/__init__.py
+
+# Prepend shim to PYTHONPATH so it shadows any real vllm install
+ENV PYTHONPATH="/opt/vllm-shim:${PYTHONPATH}"
+
+ENV HIP_FORCE_DEV_KERNARG=1
+ENV NCCL_MIN_NCHANNELS=112
+ENV GPU_MAX_HW_QUEUES=2
--- a/vllm-shim.sh
+++ b/vllm-shim.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+set -euo pipefail
+
+# ============================================================
+# vLLM -> SGLang shim
+# This script replaces the vllm binary. The k8s production stack
+# calls `vllm serve <model> [flags]`, and we intercept everything.
+# ============================================================
+
+echo ""
+echo "=========================================="
+echo "  vLLM -> SGLang Shim"
+echo "=========================================="
+echo "  Invoked as: vllm $*"
+echo ""
+echo "  All arguments received:"
+i=1
+for arg in "$@"; do
+  echo "    [$i] $arg"
+  i=$((i + 1))
+done
+echo "=========================================="
+echo ""
+
+# Defaults
+HOST="0.0.0.0"
+PORT="8000"
+
+# Parse host and port from whatever the stack sends
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    serve)        shift ;;  # skip the 'serve' subcommand
+    --host)       HOST="$2"; shift 2 ;;
+    --host=*)     HOST="${1#*=}"; shift ;;
+    --port)       PORT="$2"; shift 2 ;;
+    --port=*)     PORT="${1#*=}"; shift ;;
+    *)            shift ;;  # ignore everything else
+  esac
+done
+
+echo "Launching SGLang on ${HOST}:${PORT}"
+echo ""
+
+exec python -m sglang.launch_server \
+  --model-path mistralai/Devstral-2-123B-Instruct-2512 \
+  --host "$HOST" \
+  --port "$PORT" \
+  --tp 8 \
+  --tool-call-parser mistral
--- a/vllm_shim_module.py
+++ b/vllm_shim_module.py
@@ -0,0 +1,64 @@
+"""
+vLLM -> SGLang Python shim.
+Catches `python -m vllm.entrypoints.openai.api_server` (and similar)
+and launches SGLang instead.
+"""
+import os
+import sys
+import subprocess
+
+def main():
+    args = sys.argv[1:]
+
+    print()
+    print("==========================================")
+    print("  vLLM -> SGLang Shim (Python module)")
+    print("==========================================")
+    print(f"  Invoked as: python -m {__name__} {' '.join(args)}")
+    print()
+    print("  All arguments received:")
+    for i, arg in enumerate(args, 1):
+        print(f"    [{i}] {arg}")
+    print("==========================================")
+    print()
+
+    host = "0.0.0.0"
+    port = "8000"
+
+    i = 0
+    while i < len(args):
+        if args[i] == "--host" and i + 1 < len(args):
+            host = args[i + 1]
+            i += 2
+        elif args[i].startswith("--host="):
+            host = args[i].split("=", 1)[1]
+            i += 1
+        elif args[i] == "--port" and i + 1 < len(args):
+            port = args[i + 1]
+            i += 2
+        elif args[i].startswith("--port="):
+            port = args[i].split("=", 1)[1]
+            i += 1
+        else:
+            i += 1
+
+    print(f"Launching SGLang on {host}:{port}")
+    print()
+
+    os.execvp(
+        sys.executable,
+        [
+            sys.executable, "-m", "sglang.launch_server",
+            "--model-path", "mistralai/Devstral-2-123B-Instruct-2512",
+            "--host", host,
+            "--port", port,
+            "--tp", "8",
+            "--tool-call-parser", "mistral",
+        ],
+    )
+
+if __name__ == "__main__":
+    main()
+
+# Also run if imported as a module (some invocation paths just import the file)
+main()