diff --git a/Dockerfile b/Dockerfile index 8d9b8ea..4f6f11c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,4 +10,8 @@ COPY utils.py /usr/local/lib/python3.12/dist-packages/vllm/tool_parsers/utils.py COPY vllm_patches/hf.py /usr/local/lib/python3.12/dist-packages/vllm/renderers/hf.py # Install LMCache for KV cache offloading / sharing across nodes -RUN pip install --no-cache-dir lmcache +# Build from source to match CUDA 13 (pip wheel is CUDA 12) +RUN git clone --depth 1 https://github.com/LMCache/LMCache.git /tmp/lmcache && \ + cd /tmp/lmcache && \ + pip install --no-cache-dir --no-build-isolation -e . && \ + rm -rf /tmp/lmcache