diff --git a/Dockerfile b/Dockerfile index 9b65dd5..8d9b8ea 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,3 +8,6 @@ COPY utils.py /usr/local/lib/python3.12/dist-packages/vllm/tool_parsers/utils.py # Patch hf renderer to force string content format for GLM models # This fixes the issue where tool response content is dropped COPY vllm_patches/hf.py /usr/local/lib/python3.12/dist-packages/vllm/renderers/hf.py + +# Install LMCache for KV cache offloading / sharing across nodes +RUN pip install --no-cache-dir lmcache