Updated for v0.10.0

2025-08-20 21:02:46 +00:00
parent 23267e4bf5
commit daf345024b
5 changed files with 23 additions and 13 deletions
--- a/lmcache/Dockerfile
+++ b/lmcache/Dockerfile
@@ -46,11 +46,11 @@ FROM base AS build-base
 # Install build tools and dependencies
 RUN uv pip install -U build cmake ninja pybind11 setuptools==79.0.1 wheel

-# Clone and build LMCache wheel
-RUN git clone https://github.com/rajesh-s/LMCache.git -b v0.3.0 && \
+RUN echo "Hello"
+# Clone the repo, apply the patch, and build
+RUN git clone https://github.com/LMCache/LMCache.git -b v0.3.3 && \
    cd LMCache && \
    uv pip install setuptools_scm && \
    python -m build --wheel --no-isolation && \
    cp dist/*.whl /workspace/
-
 CMD ["/bin/bash"]
--- a/lmcache/README
+++ b/lmcache/README
@@ -0,0 +1,5 @@
+sudo docker build -t lmcache .
+sudo docker run -it --rm lmcache /bin/bash
+
+sudo docker ps
+sudo docker cp e1a106fdc1d6:/workspace/lmcache-0.3.3-cp312-cp312-linux_aarch64.whl . 
--- a/vllm/Dockerfile
+++ b/vllm/Dockerfile
@@ -51,8 +51,7 @@ RUN apt-get update && apt install -y wget

 RUN uv pip install numpy==2.0.0
 # Install pytorch nightly
-RUN uv pip install torch==2.7.0+cu128 torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu128 --torch-backend=cu128
-# RUN wget https://download.pytorch.org/whl/nightly/cu128/torch-2.7.0.dev20250310%2Bcu128-cp312-cp312-linux_aarch64.whl -O torch-2.7.0.dev20250310+cu128-cp312-cp312-linux_aarch64.whl
+RUN uv pip install torch==2.7.1+cu128 torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu128 --torch-backend=cu128

 # Install from the wheel
 # RUN uv pip install ./torch-2.7.0.dev20250310+cu128-cp312-cp312-linux_aarch64.whl
@@ -100,8 +99,14 @@ RUN cd xformers && \
 #     git submodule update --init --recursive -j 8 && \
 #     uv build --wheel --no-build-isolation -o /wheels

+RUN git clone https://github.com/flashinfer-ai/flashinfer.git --recursive && \
+    cd flashinfer && git checkout v0.2.8rc1 && \
+    uv pip install ninja && \
+    uv pip install --no-build-isolation --verbose .
+
+
 FROM build-base AS build-vllm
-ARG VLLM_REF=v0.9.1
+ARG VLLM_REF=v0.10.0
 RUN git clone https://github.com/vllm-project/vllm.git
 RUN cd vllm && \
    git checkout ${VLLM_REF} && \
@@ -109,7 +114,7 @@ RUN cd vllm && \
    git submodule update --init --recursive -j 8 && \
    python use_existing_torch.py && \
    uv pip install -r requirements/build.txt && \
-    MAX_JOBS=32 uv build --wheel --no-build-isolation -o /wheels
+    MAX_JOBS=16 uv build --wheel --no-build-isolation -o /wheels


 FROM base AS vllm-openai
@@ -139,8 +144,8 @@ RUN uv pip install -U build cmake ninja pybind11 setuptools==79.0.1 wheel

 # Clone and build LMCache wheel without Infinistore that is broken on aarch64
 # Copy the wheel from host to container
-COPY lmcache-0.3.1.dev1-cp312-cp312-linux_aarch64.whl /tmp/
-RUN uv pip install /tmp/lmcache-0.3.1.dev1-cp312-cp312-linux_aarch64.whl --no-deps
+COPY lmcache-0.3.3-cp312-cp312-linux_aarch64.whl /tmp/
+RUN uv pip install /tmp/lmcache-0.3.3-cp312-cp312-linux_aarch64.whl --no-deps

 # Enable hf-transfer
 ENV HF_HUB_ENABLE_HF_TRANSFER=1
@@ -157,10 +162,7 @@ RUN apt install -y --no-install-recommends tmux cmake
 # Install required build tool
 RUN uv pip install ninja

-RUN git clone https://github.com/flashinfer-ai/flashinfer.git --recursive && \
-    cd flashinfer && \
-    uv pip install ninja && \
-    uv pip install --no-build-isolation --verbose .
+
 # API server entrypoint
 # ENTRYPOINT ["vllm", "serve"]
 CMD ["/bin/bash"]
--- a/vllm/README.md
+++ b/vllm/README.md
@@ -6,6 +6,9 @@ Hosted [here](https://hub.docker.com/repository/docker/rajesh550/gh200-vllm)
 docker login
 docker buildx build --platform linux/arm64 --memory=600g -t rajesh550/gh200-vllm:0.9.0.1 .

+# Alternative
+sudo docker build --memory=300g --platform linux/arm64 -t rajesh550/gh200-vllm:0.10.1 .
+
 docker build --memory=300g -t rajesh550/gh200-vllm:0.9.0.1 .
 docker push rajesh550/gh200-vllm:0.9.0.1
 ```
--- a/vllm/lmcache-0.3.3-cp312-cp312-linux_aarch64.whl
+++ b/vllm/lmcache-0.3.3-cp312-cp312-linux_aarch64.whl