feat: add pip install lmcache for KV cache offloading

This commit is contained in:
2026-04-15 04:43:05 +00:00
parent 139e617ed0
commit 0b70c975bd

View File

@@ -8,3 +8,6 @@ COPY utils.py /usr/local/lib/python3.12/dist-packages/vllm/tool_parsers/utils.py
# Patch hf renderer to force string content format for GLM models # Patch hf renderer to force string content format for GLM models
# This fixes the issue where tool response content is dropped # This fixes the issue where tool response content is dropped
COPY vllm_patches/hf.py /usr/local/lib/python3.12/dist-packages/vllm/renderers/hf.py COPY vllm_patches/hf.py /usr/local/lib/python3.12/dist-packages/vllm/renderers/hf.py
# Install LMCache for KV cache offloading / sharing across nodes
RUN pip install --no-cache-dir lmcache