vllm-with-lmcache/Dockerfile

FROM vllm/vllm-openai-rocm:nightly

ENV MAX_JOBS=2

# LMCache for KV cache offloading / sharing across nodes
# Build HIP extensions for MI300X (gfx942)
RUN apt-get update && apt-get install -y git && \
    git clone https://github.com/Byteflux/LMCache.git /tmp/lmcache && \
    cd /tmp/lmcache && \
    git checkout mla-multi-group-kv-cache-with-redis && \
    pip install --no-cache-dir -r requirements/build.txt && \
    BUILD_WITH_HIP=1 \
    CXX=hipcc \
    PYTORCH_ROCM_ARCH="gfx942" \
    pip install --no-cache-dir --no-build-isolation . --verbose && \
    rm -rf /tmp/lmcache && export CACHE_BUSTER=1

# Nemotron reasoning parser
COPY ./super_v3_reasoning_parser.py /opt/super_v3_reasoning_parser.py

# DeepSeek tool call parser with MTP fixes
COPY deepseekv32_tool_parser.py /usr/local/lib/python3.12/dist-packages/vllm/tool_parsers/deepseekv32_tool_parser.py

# MiniMax tool call parser with kwargs fixes
COPY minimax_tool_parser.py /usr/local/lib/python3.12/dist-packages/vllm/tool_parsers/minimax_tool_parser.py
COPY minimax_m2_parser.py /usr/local/lib/python3.12/dist-packages/vllm/parser/minimax_m2_parser.py