Updated for v0.11.0

This commit is contained in:
Rajesh Shashi Kumar
2025-10-16 01:08:21 +00:00
parent 31f4489d1f
commit 02430037ea
3 changed files with 12 additions and 10 deletions

View File

@@ -6,7 +6,9 @@ This repository provides a Dockerfile to build a container with vLLM and all its
If you prefer not to build the image yourself, you can pull the ready-to-use image directly from Docker Hub:
`docker pull rajesh550/gh200-vllm:0.10.2`
```bash
docker run --rm -it --gpus all -v "$PWD":"$PWD" -w "$PWD" rajesh550/gh200-vllm:0.11.0 bash
```
👉 [Docker Hub](https://hub.docker.com/repository/docker/rajesh550/gh200-vllm/general)

View File

@@ -9,7 +9,7 @@ FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-devel-${IMAGE_DISTRO} AS base
# 'a' suffix is not forward compatible but enables all optimizations
ARG TORCH_CUDA_ARCH_LIST="9.0a"
ENV TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST}
ENV UV_TORCH_BACKEND=cu128
ENV UV_TORCH_BACKEND=cu129
ARG VLLM_FA_CMAKE_GPU_ARCHES="90a-real"
ENV VLLM_FA_CMAKE_GPU_ARCHES=${VLLM_FA_CMAKE_GPU_ARCHES}
@@ -73,7 +73,7 @@ RUN cd triton && \
RUN export MAX_JOBS=6
FROM build-base AS build-xformers
ARG XFORMERS_REF=v0.0.32
ARG XFORMERS_REF=v0.0.32.post2
ARG XFORMERS_BUILD_VERSION=0.0.30+cu129
ENV BUILD_VERSION=${XFORMERS_BUILD_VERSION:-${XFORMERS_REF#v}}
RUN git clone https://github.com/facebookresearch/xformers.git
@@ -86,7 +86,7 @@ RUN cd xformers && \
# Currently not supported on CUDA 12.8
FROM build-base AS build-flashinfer
ARG FLASHINFER_ENABLE_AOT=1
ARG FLASHINFER_REF=v0.3.1
ARG FLASHINFER_REF=v0.4.1
ARG FLASHINFER_BUILD_SUFFIX=cu129
ENV FLASHINFER_LOCAL_VERSION=${FLASHINFER_BUILD_SUFFIX:-}
RUN git clone https://github.com/flashinfer-ai/flashinfer.git
@@ -97,7 +97,7 @@ RUN cd flashinfer && \
uv build --wheel --no-build-isolation -o /wheels
FROM build-base AS build-lmcache
ARG LMCACHE_REF=v0.3.3
ARG LMCACHE_REF=v0.3.7
RUN git clone https://github.com/LMCache/LMCache.git -b ${LMCACHE_REF} && \
cd LMCache && \
uv pip install setuptools_scm && \
@@ -105,7 +105,7 @@ RUN git clone https://github.com/LMCache/LMCache.git -b ${LMCACHE_REF} && \
cp dist/*.whl /wheels/
FROM build-base AS build-vllm
ARG VLLM_REF=v0.10.2
ARG VLLM_REF=v0.11.0
RUN git clone https://github.com/vllm-project/vllm.git
RUN cd vllm && \
git checkout ${VLLM_REF} && \
@@ -137,7 +137,7 @@ RUN git clone https://github.com/bytedance/InfiniStore && \
cp dist/*.whl /wheels/
FROM base AS vllm-openai
# COPY --from=build-flashinfer /wheels/* wheels/
COPY --from=build-flashinfer /wheels/* wheels/
COPY --from=build-triton /wheels/* wheels/
COPY --from=build-vllm /wheels/* wheels/
COPY --from=build-xformers /wheels/* wheels/

View File

@@ -3,9 +3,9 @@
Hosted [here](https://hub.docker.com/repository/docker/rajesh550/gh200-vllm)
```bash
sudo docker login
docker login
# Alternative
# docker buildx build --platform linux/arm64 --memory=600g -t rajesh550/gh200-vllm:0.9.0.1 .
sudo docker build --memory=300g --platform linux/arm64 -t rajesh550/gh200-vllm:0.10.2 .
sudo docker push rajesh550/gh200-vllm:0.10.2
docker build --memory=300g --platform linux/arm64 -t rajesh550/gh200-vllm:0.11.0 .
docker push rajesh550/gh200-vllm:0.11.0
```