feat: add pip install lmcache for KV cache offloading
This commit is contained in:
@@ -8,3 +8,6 @@ COPY utils.py /usr/local/lib/python3.12/dist-packages/vllm/tool_parsers/utils.py
|
|||||||
# Patch hf renderer to force string content format for GLM models
|
# Patch hf renderer to force string content format for GLM models
|
||||||
# This fixes the issue where tool response content is dropped
|
# This fixes the issue where tool response content is dropped
|
||||||
COPY vllm_patches/hf.py /usr/local/lib/python3.12/dist-packages/vllm/renderers/hf.py
|
COPY vllm_patches/hf.py /usr/local/lib/python3.12/dist-packages/vllm/renderers/hf.py
|
||||||
|
|
||||||
|
# Install LMCache for KV cache offloading / sharing across nodes
|
||||||
|
RUN pip install --no-cache-dir lmcache
|
||||||
|
|||||||
Reference in New Issue
Block a user