grace-gpu-containers/lmcache/Dockerfile

# ---------- Builder Base ----------
# Using NVIDIA NGC PyTorch container (26.03) with:
# - PyTorch 2.11.0a0 (bleeding edge)
# - CUDA 13.2.0
# - cuDNN 9.20, NCCL 2.29.7, TensorRT 10.16, TransformerEngine 2.13
# - Multi-arch: x86 + ARM SBSA (GH200 support)
FROM nvcr.io/nvidia/pytorch:26.03-py3 AS base

# Set arch lists for all targets
ARG TORCH_CUDA_ARCH_LIST="9.0a"
ENV TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST}
ENV DEBIAN_FRONTEND=noninteractive

# Install dependencies
RUN apt update && apt upgrade -y && \
    apt install -y --no-install-recommends \
        curl \
        git \
        libibverbs-dev \
        zlib1g-dev && \
    apt clean && \
    rm -rf /var/lib/apt/lists/* /var/cache/apt/archives

# Set compiler paths
ENV CC=/usr/bin/gcc
ENV CXX=/usr/bin/g++

# Install uv
RUN curl -LsSf https://astral.sh/uv/install.sh | env UV_INSTALL_DIR=/usr/local/bin sh

# Set up workspace
WORKDIR /workspace

# Environment setup (PyTorch container already has CUDA paths set)
ENV CUDA_HOME=/usr/local/cuda
ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}

FROM base AS build-base

# Install build tools and dependencies
RUN pip install -U build cmake ninja pybind11 "setuptools>=77.0.3,<81.0.0"

# Clone the repo, apply the patch, and build
RUN git clone https://github.com/LMCache/LMCache.git && \
    cd LMCache && \
    git checkout dev && \
    echo "\n\n========================================" && \
    echo ">>> BUILDING LMCACHE FROM:" && \
    echo ">>> BRANCH: $(git rev-parse --abbrev-ref HEAD)" && \
    echo ">>> COMMIT: $(git rev-parse HEAD)" && \
    echo ">>> DATE:   $(git log -1 --format=%cd --date=short)" && \
    echo "========================================\n\n" && \
    pip install setuptools_scm && \
    python -m build --wheel --no-isolation && \
    cp dist/*.whl /workspace/
CMD ["/bin/bash"]