use a shim
This commit is contained in:
25
Dockerfile
25
Dockerfile
@@ -1,6 +1,25 @@
|
|||||||
FROM lmsysorg/sglang-rocm:v0.5.10rc0-rocm700-mi30x-20260411
|
FROM lmsysorg/sglang-rocm:v0.5.10rc0-rocm700-mi30x-20260411
|
||||||
|
|
||||||
COPY entrypoint.sh /entrypoint.sh
|
# Replace the vllm binary with our shim so no matter how the
|
||||||
RUN chmod +x /entrypoint.sh
|
# production stack invokes vllm, we intercept it
|
||||||
|
COPY vllm-shim.sh /usr/local/bin/vllm
|
||||||
|
RUN chmod +x /usr/local/bin/vllm
|
||||||
|
|
||||||
ENTRYPOINT ["/entrypoint.sh"]
|
# Also handle `python -m vllm.entrypoints.openai.api_server` and
|
||||||
|
# `python -m vllm.entrypoints.cli.main` by shadowing the vllm package
|
||||||
|
RUN mkdir -p /opt/vllm-shim/vllm/entrypoints/openai \
|
||||||
|
/opt/vllm-shim/vllm/entrypoints/cli
|
||||||
|
COPY vllm_shim_module.py /opt/vllm-shim/vllm/__main__.py
|
||||||
|
COPY vllm_shim_module.py /opt/vllm-shim/vllm/entrypoints/openai/api_server.py
|
||||||
|
COPY vllm_shim_module.py /opt/vllm-shim/vllm/entrypoints/cli/main.py
|
||||||
|
RUN touch /opt/vllm-shim/vllm/__init__.py \
|
||||||
|
/opt/vllm-shim/vllm/entrypoints/__init__.py \
|
||||||
|
/opt/vllm-shim/vllm/entrypoints/openai/__init__.py \
|
||||||
|
/opt/vllm-shim/vllm/entrypoints/cli/__init__.py
|
||||||
|
|
||||||
|
# Prepend shim to PYTHONPATH so it shadows any real vllm install
|
||||||
|
ENV PYTHONPATH="/opt/vllm-shim:${PYTHONPATH}"
|
||||||
|
|
||||||
|
ENV HIP_FORCE_DEV_KERNARG=1
|
||||||
|
ENV NCCL_MIN_NCHANNELS=112
|
||||||
|
ENV GPU_MAX_HW_QUEUES=2
|
||||||
49
vllm-shim.sh
Normal file
49
vllm-shim.sh
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# vLLM -> SGLang shim
|
||||||
|
# This script replaces the vllm binary. The k8s production stack
|
||||||
|
# calls `vllm serve <model> [flags]`, and we intercept everything.
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "=========================================="
|
||||||
|
echo " vLLM -> SGLang Shim"
|
||||||
|
echo "=========================================="
|
||||||
|
echo " Invoked as: vllm $*"
|
||||||
|
echo ""
|
||||||
|
echo " All arguments received:"
|
||||||
|
i=1
|
||||||
|
for arg in "$@"; do
|
||||||
|
echo " [$i] $arg"
|
||||||
|
i=$((i + 1))
|
||||||
|
done
|
||||||
|
echo "=========================================="
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Defaults
|
||||||
|
HOST="0.0.0.0"
|
||||||
|
PORT="8000"
|
||||||
|
|
||||||
|
# Parse host and port from whatever the stack sends
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
|
case "$1" in
|
||||||
|
serve) shift ;; # skip the 'serve' subcommand
|
||||||
|
--host) HOST="$2"; shift 2 ;;
|
||||||
|
--host=*) HOST="${1#*=}"; shift ;;
|
||||||
|
--port) PORT="$2"; shift 2 ;;
|
||||||
|
--port=*) PORT="${1#*=}"; shift ;;
|
||||||
|
*) shift ;; # ignore everything else
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "Launching SGLang on ${HOST}:${PORT}"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
exec python -m sglang.launch_server \
|
||||||
|
--model-path mistralai/Devstral-2-123B-Instruct-2512 \
|
||||||
|
--host "$HOST" \
|
||||||
|
--port "$PORT" \
|
||||||
|
--tp 8 \
|
||||||
|
--tool-call-parser mistral
|
||||||
64
vllm_shim_module.py
Normal file
64
vllm_shim_module.py
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
"""
|
||||||
|
vLLM -> SGLang Python shim.
|
||||||
|
Catches `python -m vllm.entrypoints.openai.api_server` (and similar)
|
||||||
|
and launches SGLang instead.
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
def main():
|
||||||
|
args = sys.argv[1:]
|
||||||
|
|
||||||
|
print()
|
||||||
|
print("==========================================")
|
||||||
|
print(" vLLM -> SGLang Shim (Python module)")
|
||||||
|
print("==========================================")
|
||||||
|
print(f" Invoked as: python -m {__name__} {' '.join(args)}")
|
||||||
|
print()
|
||||||
|
print(" All arguments received:")
|
||||||
|
for i, arg in enumerate(args, 1):
|
||||||
|
print(f" [{i}] {arg}")
|
||||||
|
print("==========================================")
|
||||||
|
print()
|
||||||
|
|
||||||
|
host = "0.0.0.0"
|
||||||
|
port = "8000"
|
||||||
|
|
||||||
|
i = 0
|
||||||
|
while i < len(args):
|
||||||
|
if args[i] == "--host" and i + 1 < len(args):
|
||||||
|
host = args[i + 1]
|
||||||
|
i += 2
|
||||||
|
elif args[i].startswith("--host="):
|
||||||
|
host = args[i].split("=", 1)[1]
|
||||||
|
i += 1
|
||||||
|
elif args[i] == "--port" and i + 1 < len(args):
|
||||||
|
port = args[i + 1]
|
||||||
|
i += 2
|
||||||
|
elif args[i].startswith("--port="):
|
||||||
|
port = args[i].split("=", 1)[1]
|
||||||
|
i += 1
|
||||||
|
else:
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
print(f"Launching SGLang on {host}:{port}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
os.execvp(
|
||||||
|
sys.executable,
|
||||||
|
[
|
||||||
|
sys.executable, "-m", "sglang.launch_server",
|
||||||
|
"--model-path", "mistralai/Devstral-2-123B-Instruct-2512",
|
||||||
|
"--host", host,
|
||||||
|
"--port", port,
|
||||||
|
"--tp", "8",
|
||||||
|
"--tool-call-parser", "mistral",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|
||||||
|
# Also run if imported as a module (some invocation paths just import the file)
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user