Updated for v0.11.0

2025-10-16 01:08:21 +00:00
parent 31f4489d1f
commit 02430037ea
3 changed files with 12 additions and 10 deletions
--- a/README.md
+++ b/README.md
@@ -6,7 +6,9 @@ This repository provides a Dockerfile to build a container with vLLM and all its

 If you prefer not to build the image yourself, you can pull the ready-to-use image directly from Docker Hub:

-`docker pull rajesh550/gh200-vllm:0.10.2`
+```bash
+docker run --rm -it --gpus all -v "$PWD":"$PWD" -w "$PWD" rajesh550/gh200-vllm:0.11.0 bash
+```

 👉 [Docker Hub](https://hub.docker.com/repository/docker/rajesh550/gh200-vllm/general)

--- a/vllm/Dockerfile
+++ b/vllm/Dockerfile
@@ -9,7 +9,7 @@ FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-devel-${IMAGE_DISTRO} AS base
 # 'a' suffix is not forward compatible but enables all optimizations
 ARG TORCH_CUDA_ARCH_LIST="9.0a"
 ENV TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST}
-ENV UV_TORCH_BACKEND=cu128
+ENV UV_TORCH_BACKEND=cu129
 ARG VLLM_FA_CMAKE_GPU_ARCHES="90a-real"
 ENV VLLM_FA_CMAKE_GPU_ARCHES=${VLLM_FA_CMAKE_GPU_ARCHES}

@@ -73,7 +73,7 @@ RUN cd triton && \

 RUN export MAX_JOBS=6
 FROM build-base AS build-xformers
-ARG XFORMERS_REF=v0.0.32
+ARG XFORMERS_REF=v0.0.32.post2
 ARG XFORMERS_BUILD_VERSION=0.0.30+cu129
 ENV BUILD_VERSION=${XFORMERS_BUILD_VERSION:-${XFORMERS_REF#v}}
 RUN git clone  https://github.com/facebookresearch/xformers.git
@@ -86,7 +86,7 @@ RUN cd xformers && \
 # Currently not supported on CUDA 12.8
 FROM build-base AS build-flashinfer
 ARG FLASHINFER_ENABLE_AOT=1
-ARG FLASHINFER_REF=v0.3.1
+ARG FLASHINFER_REF=v0.4.1
 ARG FLASHINFER_BUILD_SUFFIX=cu129
 ENV FLASHINFER_LOCAL_VERSION=${FLASHINFER_BUILD_SUFFIX:-}
 RUN git clone https://github.com/flashinfer-ai/flashinfer.git
@@ -97,7 +97,7 @@ RUN cd flashinfer && \
    uv build --wheel --no-build-isolation -o /wheels

 FROM build-base AS build-lmcache
-ARG LMCACHE_REF=v0.3.3
+ARG LMCACHE_REF=v0.3.7
 RUN git clone https://github.com/LMCache/LMCache.git -b ${LMCACHE_REF} && \
    cd LMCache && \
    uv pip install setuptools_scm && \
@@ -105,7 +105,7 @@ RUN git clone https://github.com/LMCache/LMCache.git -b ${LMCACHE_REF} && \
    cp dist/*.whl /wheels/

 FROM build-base AS build-vllm
-ARG VLLM_REF=v0.10.2
+ARG VLLM_REF=v0.11.0
 RUN git clone https://github.com/vllm-project/vllm.git
 RUN cd vllm && \
    git checkout ${VLLM_REF} && \
@@ -137,7 +137,7 @@ RUN git clone https://github.com/bytedance/InfiniStore && \
    cp dist/*.whl /wheels/

 FROM base AS vllm-openai
-# COPY --from=build-flashinfer /wheels/* wheels/
+COPY --from=build-flashinfer /wheels/* wheels/
 COPY --from=build-triton /wheels/* wheels/
 COPY --from=build-vllm /wheels/* wheels/
 COPY --from=build-xformers /wheels/* wheels/
--- a/vllm/README.md
+++ b/vllm/README.md
@@ -3,9 +3,9 @@
 Hosted [here](https://hub.docker.com/repository/docker/rajesh550/gh200-vllm)

 ```bash
-sudo docker login
+ docker login
 # Alternative
 # docker buildx build --platform linux/arm64 --memory=600g -t rajesh550/gh200-vllm:0.9.0.1 .
-sudo docker build --memory=300g --platform linux/arm64 -t rajesh550/gh200-vllm:0.10.2 .  
-sudo docker push rajesh550/gh200-vllm:0.10.2
+ docker build --memory=300g --platform linux/arm64 -t rajesh550/gh200-vllm:0.11.0 .  
+ docker push rajesh550/gh200-vllm:0.11.0
 ```