From f8a9d372e5f34e71b81382b31fa561d6009fd975 Mon Sep 17 00:00:00 2001
From: biondizzle <biondizzle@gmail.com>
Date: Fri, 3 Apr 2026 00:05:56 +0000
Subject: [PATCH] Use PyPI vLLM wheel instead of building (QEMU cmake
 try_compile fails)

- vLLM 0.18.1 aarch64 wheel includes pre-compiled FA2, FA3, MoE kernels
- Original build-from-source code commented out for GH200 restoration
- CMake compiler ABI detection fails under QEMU emulation
---
 vllm/Dockerfile | 41 +++++++++++++++++++++++++++--------------
 1 file changed, 27 insertions(+), 14 deletions(-)

diff --git a/vllm/Dockerfile b/vllm/Dockerfile
index 6cf4a54..4592cdc 100644
--- a/vllm/Dockerfile
+++ b/vllm/Dockerfile
@@ -120,21 +120,34 @@ RUN apt-get update && apt-get install -y build-essential cmake gcc && \
     pip wheel . -v --no-deps --no-build-isolation -w ./wheels/ && \
     cp wheels/*.whl /wheels/
 
+# ==============================================================================
+# NOTE: Using PyPI vLLM wheel instead of building from source
+# Reason: QEMU cmake try_compile fails during compiler ABI detection
+# PyPI wheel v0.18.1 includes pre-compiled FA2, FA3, MoE kernels for aarch64
+# To restore native build on GH200, uncomment the block below and comment out
+# the PyPI download section.
+# ==============================================================================
+# FROM build-base AS build-vllm
+# ARG VLLM_REF=v0.11.1rc2
+# # Install ccache for faster compilation
+# RUN apt-get update && apt-get install -y ccache
+# RUN git clone https://github.com/vllm-project/vllm.git
+# RUN cd vllm && \
+#     git checkout ${VLLM_REF} && \
+#     git submodule sync && \
+#     git submodule update --init --recursive -j 8 && \
+#     sed -i 's/GIT_TAG [a-f0-9]\{40\}/GIT_TAG main/' cmake/external_projects/vllm_flash_attn.cmake && \
+#     export MAX_JOBS=4 && \
+#     export CMAKE_BUILD_PARALLEL_LEVEL=$MAX_JOBS && \
+#     python use_existing_torch.py && \
+#     uv pip install -r requirements/build.txt && \
+#     CCACHE_NOHASHDIR="true" uv build --wheel --no-build-isolation -o /wheels
+
+# Use PyPI vLLM wheel (QEMU cmake fails during try_compile)
 FROM build-base AS build-vllm
-ARG VLLM_REF=v0.11.1rc2
-# Install ccache for faster compilation
-RUN apt-get update && apt-get install -y ccache
-RUN git clone https://github.com/vllm-project/vllm.git
-RUN cd vllm && \
-    git checkout ${VLLM_REF} && \
-    git submodule sync && \
-    git submodule update --init --recursive -j 8 && \
-    sed -i 's/GIT_TAG [a-f0-9]\{40\}/GIT_TAG main/' cmake/external_projects/vllm_flash_attn.cmake && \
-    export MAX_JOBS=4 && \
-    export CMAKE_BUILD_PARALLEL_LEVEL=$MAX_JOBS && \
-    python use_existing_torch.py && \
-    uv pip install -r requirements/build.txt && \
-    CCACHE_NOHASHDIR="true" uv build --wheel --no-build-isolation -o /wheels
+ARG VLLM_VERSION=0.18.1
+RUN mkdir -p /wheels && \
+    pip download vllm==${VLLM_VERSION} --platform manylinux_2_31_aarch64 --only-binary=:all: --no-deps -d /wheels
 
 # Build infinistore after vllm to avoid cache invalidation
 FROM build-base AS build-infinistore