From 9d88d4c7d84cb1416cc87120abb4d57fd0103b93 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Fri, 3 Apr 2026 05:50:02 +0000 Subject: [PATCH] Skip xformers - vLLM has built-in FlashAttention kernels xformers requires TORCH_STABLE_ONLY which needs torch/csrc/stable/ headers not present in PyTorch 2.9.0. vLLM 0.18.1 includes its own FA2/FA3 kernels. --- vllm/Dockerfile | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/vllm/Dockerfile b/vllm/Dockerfile index 469333c..e455169 100644 --- a/vllm/Dockerfile +++ b/vllm/Dockerfile @@ -69,15 +69,14 @@ FROM build-base AS build-triton RUN mkdir -p /wheels && \ pip download triton==3.6.0 --platform manylinux_2_27_aarch64 --only-binary=:all: --no-deps -d /wheels -FROM build-base AS build-xformers -#ARG XFORMERS_REF=v0.0.32.post2 -#ARG XFORMERS_BUILD_VERSION=0.0.30+cu130 -#ENV BUILD_VERSION=${XFORMERS_BUILD_VERSION:-${XFORMERS_REF#v}} -RUN git clone https://github.com/facebookresearch/xformers.git -RUN cd xformers && \ - git submodule sync && \ - git submodule update --init --recursive -j 8 && \ - MAX_JOBS=8 uv build --wheel --no-build-isolation -o /wheels +# Skip xformers - vLLM has built-in FlashAttention kernels +# xformers requires TORCH_STABLE_ONLY which needs PyTorch headers not in 2.9.0 +# FROM build-base AS build-xformers +# RUN git clone https://github.com/facebookresearch/xformers.git +# RUN cd xformers && \ +# git submodule sync && \ +# git submodule update --init --recursive -j 8 && \ +# MAX_JOBS=8 uv build --wheel --no-build-isolation -o /wheels FROM build-base AS build-flashinfer ARG FLASHINFER_ENABLE_AOT=1 @@ -173,7 +172,6 @@ COPY --from=build-flash-attention /wheels/* wheels/ COPY --from=build-flashinfer /wheels/* wheels/ COPY --from=build-triton /wheels/* wheels/ COPY --from=build-vllm /wheels/* wheels/ -COPY --from=build-xformers /wheels/* wheels/ COPY --from=build-lmcache /wheels/* wheels/ COPY --from=build-infinistore /wheels/* wheels/