From 9d88d4c7d84cb1416cc87120abb4d57fd0103b93 Mon Sep 17 00:00:00 2001
From: biondizzle <biondizzle@gmail.com>
Date: Fri, 3 Apr 2026 05:50:02 +0000
Subject: [PATCH] Skip xformers - vLLM has built-in FlashAttention kernels

xformers requires TORCH_STABLE_ONLY which needs torch/csrc/stable/ headers
not present in PyTorch 2.9.0. vLLM 0.18.1 includes its own FA2/FA3 kernels.
---
 vllm/Dockerfile | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/vllm/Dockerfile b/vllm/Dockerfile
index 469333c..e455169 100644
--- a/vllm/Dockerfile
+++ b/vllm/Dockerfile
@@ -69,15 +69,14 @@ FROM build-base AS build-triton
 RUN mkdir -p /wheels && \
     pip download triton==3.6.0 --platform manylinux_2_27_aarch64 --only-binary=:all: --no-deps -d /wheels
 
-FROM build-base AS build-xformers
-#ARG XFORMERS_REF=v0.0.32.post2
-#ARG XFORMERS_BUILD_VERSION=0.0.30+cu130
-#ENV BUILD_VERSION=${XFORMERS_BUILD_VERSION:-${XFORMERS_REF#v}}
-RUN git clone https://github.com/facebookresearch/xformers.git
-RUN cd xformers && \
-    git submodule sync && \
-    git submodule update --init --recursive -j 8 && \
-    MAX_JOBS=8 uv build --wheel --no-build-isolation -o /wheels
+# Skip xformers - vLLM has built-in FlashAttention kernels
+# xformers requires TORCH_STABLE_ONLY which needs PyTorch headers not in 2.9.0
+# FROM build-base AS build-xformers
+# RUN git clone https://github.com/facebookresearch/xformers.git
+# RUN cd xformers && \
+#     git submodule sync && \
+#     git submodule update --init --recursive -j 8 && \
+#     MAX_JOBS=8 uv build --wheel --no-build-isolation -o /wheels
 
 FROM build-base AS build-flashinfer
 ARG FLASHINFER_ENABLE_AOT=1
@@ -173,7 +172,6 @@ COPY --from=build-flash-attention /wheels/* wheels/
 COPY --from=build-flashinfer /wheels/* wheels/
 COPY --from=build-triton /wheels/* wheels/
 COPY --from=build-vllm /wheels/* wheels/
-COPY --from=build-xformers /wheels/* wheels/
 COPY --from=build-lmcache /wheels/* wheels/
 COPY --from=build-infinistore /wheels/* wheels/