Use PyPI vLLM wheel instead of building (QEMU cmake try_compile fails)

- vLLM 0.18.1 aarch64 wheel includes pre-compiled FA2, FA3, MoE kernels - Original build-from-source code commented out for GH200 restoration - CMake compiler ABI detection fails under QEMU emulation
2026-04-03 00:05:56 +00:00
parent 436214bb72
commit f8a9d372e5
1 changed files with 27 additions and 14 deletions
--- a/vllm/Dockerfile
+++ b/vllm/Dockerfile
@@ -120,21 +120,34 @@ RUN apt-get update && apt-get install -y build-essential cmake gcc && \
    pip wheel . -v --no-deps --no-build-isolation -w ./wheels/ && \
    cp wheels/*.whl /wheels/

+# ==============================================================================
+# NOTE: Using PyPI vLLM wheel instead of building from source
+# Reason: QEMU cmake try_compile fails during compiler ABI detection
+# PyPI wheel v0.18.1 includes pre-compiled FA2, FA3, MoE kernels for aarch64
+# To restore native build on GH200, uncomment the block below and comment out
+# the PyPI download section.
+# ==============================================================================
+# FROM build-base AS build-vllm
+# ARG VLLM_REF=v0.11.1rc2
+# # Install ccache for faster compilation
+# RUN apt-get update && apt-get install -y ccache
+# RUN git clone https://github.com/vllm-project/vllm.git
+# RUN cd vllm && \
+#     git checkout ${VLLM_REF} && \
+#     git submodule sync && \
+#     git submodule update --init --recursive -j 8 && \
+#     sed -i 's/GIT_TAG [a-f0-9]\{40\}/GIT_TAG main/' cmake/external_projects/vllm_flash_attn.cmake && \
+#     export MAX_JOBS=4 && \
+#     export CMAKE_BUILD_PARALLEL_LEVEL=$MAX_JOBS && \
+#     python use_existing_torch.py && \
+#     uv pip install -r requirements/build.txt && \
+#     CCACHE_NOHASHDIR="true" uv build --wheel --no-build-isolation -o /wheels
+
+# Use PyPI vLLM wheel (QEMU cmake fails during try_compile)
 FROM build-base AS build-vllm
-ARG VLLM_REF=v0.11.1rc2
-# Install ccache for faster compilation
-RUN apt-get update && apt-get install -y ccache
-RUN git clone https://github.com/vllm-project/vllm.git
-RUN cd vllm && \
-    git checkout ${VLLM_REF} && \
-    git submodule sync && \
-    git submodule update --init --recursive -j 8 && \
-    sed -i 's/GIT_TAG [a-f0-9]\{40\}/GIT_TAG main/' cmake/external_projects/vllm_flash_attn.cmake && \
-    export MAX_JOBS=4 && \
-    export CMAKE_BUILD_PARALLEL_LEVEL=$MAX_JOBS && \
-    python use_existing_torch.py && \
-    uv pip install -r requirements/build.txt && \
-    CCACHE_NOHASHDIR="true" uv build --wheel --no-build-isolation -o /wheels
+ARG VLLM_VERSION=0.18.1
+RUN mkdir -p /wheels && \
+    pip download vllm==${VLLM_VERSION} --platform manylinux_2_31_aarch64 --only-binary=:all: --no-deps -d /wheels

 # Build infinistore after vllm to avoid cache invalidation
 FROM build-base AS build-infinistore