From f8a9d372e5f34e71b81382b31fa561d6009fd975 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Fri, 3 Apr 2026 00:05:56 +0000 Subject: [PATCH] Use PyPI vLLM wheel instead of building (QEMU cmake try_compile fails) - vLLM 0.18.1 aarch64 wheel includes pre-compiled FA2, FA3, MoE kernels - Original build-from-source code commented out for GH200 restoration - CMake compiler ABI detection fails under QEMU emulation --- vllm/Dockerfile | 41 +++++++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/vllm/Dockerfile b/vllm/Dockerfile index 6cf4a54..4592cdc 100644 --- a/vllm/Dockerfile +++ b/vllm/Dockerfile @@ -120,21 +120,34 @@ RUN apt-get update && apt-get install -y build-essential cmake gcc && \ pip wheel . -v --no-deps --no-build-isolation -w ./wheels/ && \ cp wheels/*.whl /wheels/ +# ============================================================================== +# NOTE: Using PyPI vLLM wheel instead of building from source +# Reason: QEMU cmake try_compile fails during compiler ABI detection +# PyPI wheel v0.18.1 includes pre-compiled FA2, FA3, MoE kernels for aarch64 +# To restore native build on GH200, uncomment the block below and comment out +# the PyPI download section. +# ============================================================================== +# FROM build-base AS build-vllm +# ARG VLLM_REF=v0.11.1rc2 +# # Install ccache for faster compilation +# RUN apt-get update && apt-get install -y ccache +# RUN git clone https://github.com/vllm-project/vllm.git +# RUN cd vllm && \ +# git checkout ${VLLM_REF} && \ +# git submodule sync && \ +# git submodule update --init --recursive -j 8 && \ +# sed -i 's/GIT_TAG [a-f0-9]\{40\}/GIT_TAG main/' cmake/external_projects/vllm_flash_attn.cmake && \ +# export MAX_JOBS=4 && \ +# export CMAKE_BUILD_PARALLEL_LEVEL=$MAX_JOBS && \ +# python use_existing_torch.py && \ +# uv pip install -r requirements/build.txt && \ +# CCACHE_NOHASHDIR="true" uv build --wheel --no-build-isolation -o /wheels + +# Use PyPI vLLM wheel (QEMU cmake fails during try_compile) FROM build-base AS build-vllm -ARG VLLM_REF=v0.11.1rc2 -# Install ccache for faster compilation -RUN apt-get update && apt-get install -y ccache -RUN git clone https://github.com/vllm-project/vllm.git -RUN cd vllm && \ - git checkout ${VLLM_REF} && \ - git submodule sync && \ - git submodule update --init --recursive -j 8 && \ - sed -i 's/GIT_TAG [a-f0-9]\{40\}/GIT_TAG main/' cmake/external_projects/vllm_flash_attn.cmake && \ - export MAX_JOBS=4 && \ - export CMAKE_BUILD_PARALLEL_LEVEL=$MAX_JOBS && \ - python use_existing_torch.py && \ - uv pip install -r requirements/build.txt && \ - CCACHE_NOHASHDIR="true" uv build --wheel --no-build-isolation -o /wheels +ARG VLLM_VERSION=0.18.1 +RUN mkdir -p /wheels && \ + pip download vllm==${VLLM_VERSION} --platform manylinux_2_31_aarch64 --only-binary=:all: --no-deps -d /wheels # Build infinistore after vllm to avoid cache invalidation FROM build-base AS build-infinistore