use a shim

This commit is contained in:
2026-04-12 02:19:55 +00:00
parent c86fbe0166
commit 4d444bebbb
3 changed files with 135 additions and 3 deletions

View File

@@ -1,6 +1,25 @@
FROM lmsysorg/sglang-rocm:v0.5.10rc0-rocm700-mi30x-20260411
COPY entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh
# Replace the vllm binary with our shim so no matter how the
# production stack invokes vllm, we intercept it
COPY vllm-shim.sh /usr/local/bin/vllm
RUN chmod +x /usr/local/bin/vllm
ENTRYPOINT ["/entrypoint.sh"]
# Also handle `python -m vllm.entrypoints.openai.api_server` and
# `python -m vllm.entrypoints.cli.main` by shadowing the vllm package
RUN mkdir -p /opt/vllm-shim/vllm/entrypoints/openai \
/opt/vllm-shim/vllm/entrypoints/cli
COPY vllm_shim_module.py /opt/vllm-shim/vllm/__main__.py
COPY vllm_shim_module.py /opt/vllm-shim/vllm/entrypoints/openai/api_server.py
COPY vllm_shim_module.py /opt/vllm-shim/vllm/entrypoints/cli/main.py
RUN touch /opt/vllm-shim/vllm/__init__.py \
/opt/vllm-shim/vllm/entrypoints/__init__.py \
/opt/vllm-shim/vllm/entrypoints/openai/__init__.py \
/opt/vllm-shim/vllm/entrypoints/cli/__init__.py
# Prepend shim to PYTHONPATH so it shadows any real vllm install
ENV PYTHONPATH="/opt/vllm-shim:${PYTHONPATH}"
ENV HIP_FORCE_DEV_KERNARG=1
ENV NCCL_MIN_NCHANNELS=112
ENV GPU_MAX_HW_QUEUES=2

49
vllm-shim.sh Normal file
View File

@@ -0,0 +1,49 @@
#!/bin/bash
set -euo pipefail
# ============================================================
# vLLM -> SGLang shim
# This script replaces the vllm binary. The k8s production stack
# calls `vllm serve <model> [flags]`, and we intercept everything.
# ============================================================
echo ""
echo "=========================================="
echo " vLLM -> SGLang Shim"
echo "=========================================="
echo " Invoked as: vllm $*"
echo ""
echo " All arguments received:"
i=1
for arg in "$@"; do
echo " [$i] $arg"
i=$((i + 1))
done
echo "=========================================="
echo ""
# Defaults
HOST="0.0.0.0"
PORT="8000"
# Parse host and port from whatever the stack sends
while [[ $# -gt 0 ]]; do
case "$1" in
serve) shift ;; # skip the 'serve' subcommand
--host) HOST="$2"; shift 2 ;;
--host=*) HOST="${1#*=}"; shift ;;
--port) PORT="$2"; shift 2 ;;
--port=*) PORT="${1#*=}"; shift ;;
*) shift ;; # ignore everything else
esac
done
echo "Launching SGLang on ${HOST}:${PORT}"
echo ""
exec python -m sglang.launch_server \
--model-path mistralai/Devstral-2-123B-Instruct-2512 \
--host "$HOST" \
--port "$PORT" \
--tp 8 \
--tool-call-parser mistral

64
vllm_shim_module.py Normal file
View File

@@ -0,0 +1,64 @@
"""
vLLM -> SGLang Python shim.
Catches `python -m vllm.entrypoints.openai.api_server` (and similar)
and launches SGLang instead.
"""
import os
import sys
import subprocess
def main():
args = sys.argv[1:]
print()
print("==========================================")
print(" vLLM -> SGLang Shim (Python module)")
print("==========================================")
print(f" Invoked as: python -m {__name__} {' '.join(args)}")
print()
print(" All arguments received:")
for i, arg in enumerate(args, 1):
print(f" [{i}] {arg}")
print("==========================================")
print()
host = "0.0.0.0"
port = "8000"
i = 0
while i < len(args):
if args[i] == "--host" and i + 1 < len(args):
host = args[i + 1]
i += 2
elif args[i].startswith("--host="):
host = args[i].split("=", 1)[1]
i += 1
elif args[i] == "--port" and i + 1 < len(args):
port = args[i + 1]
i += 2
elif args[i].startswith("--port="):
port = args[i].split("=", 1)[1]
i += 1
else:
i += 1
print(f"Launching SGLang on {host}:{port}")
print()
os.execvp(
sys.executable,
[
sys.executable, "-m", "sglang.launch_server",
"--model-path", "mistralai/Devstral-2-123B-Instruct-2512",
"--host", host,
"--port", port,
"--tp", "8",
"--tool-call-parser", "mistral",
],
)
if __name__ == "__main__":
main()
# Also run if imported as a module (some invocation paths just import the file)
main()