feat: add pip install lmcache for KV cache offloading
This commit is contained in:
@@ -8,3 +8,6 @@ COPY utils.py /usr/local/lib/python3.12/dist-packages/vllm/tool_parsers/utils.py
|
||||
# Patch hf renderer to force string content format for GLM models
|
||||
# This fixes the issue where tool response content is dropped
|
||||
COPY vllm_patches/hf.py /usr/local/lib/python3.12/dist-packages/vllm/renderers/hf.py
|
||||
|
||||
# Install LMCache for KV cache offloading / sharing across nodes
|
||||
RUN pip install --no-cache-dir lmcache
|
||||
|
||||
Reference in New Issue
Block a user