feat: add pip install lmcache for KV cache offloading

2026-04-15 04:43:05 +00:00
parent 139e617ed0
commit 0b70c975bd
1 changed files with 3 additions and 0 deletions
--- a/3
+++ b/3
@@ -8,3 +8,6 @@ COPY utils.py /usr/local/lib/python3.12/dist-packages/vllm/tool_parsers/utils.py
 # Patch hf renderer to force string content format for GLM models
 # This fixes the issue where tool response content is dropped
 COPY vllm_patches/hf.py /usr/local/lib/python3.12/dist-packages/vllm/renderers/hf.py
+
+# Install LMCache for KV cache offloading / sharing across nodes
+RUN pip install --no-cache-dir lmcache