diff --git a/Dockerfile b/Dockerfile index 72c1b7e..72d4389 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,20 +1,5 @@ -FROM vllm/vllm-openai:v0.19.0-cu130 +#FROM vllm/vllm-openai:v0.19.0-cu130 +FROM atl.vultrcr.com/vllm/vllm-with-lmcache:v0.19.0-cu130-mla-multi-group-kv-cache # Patch tool parser for GLM regex fix COPY deepseekv32_tool_parser.py /usr/local/lib/python3.12/dist-packages/vllm/tool_parsers/deepseekv32_tool_parser.py - -# Install LMCache for KV cache offloading / sharing across nodes -# Build with system CUDA 13.0 for Blackwell (B200) -RUN apt-get update && apt-get install -y git \ - libcusolver-dev-13-0 \ - libcusparse-dev-13-0 \ - libcublas-dev-13-0 \ - libcurand-dev-13-0 \ - libcufft-dev-13-0 \ - libnvjitlink-dev-13-0 && \ - git clone --depth 1 https://github.com/LMCache/LMCache.git /tmp/lmcache && \ - cd /tmp/lmcache && \ - CUDA_HOME=/usr/local/cuda \ - TORCH_CUDA_ARCH_LIST="10.0" \ - pip install --no-cache-dir --no-build-isolation . && \ - rm -rf /tmp/lmcache \ No newline at end of file