Use PyPI vLLM wheel instead of building (QEMU cmake try_compile fails)
- vLLM 0.18.1 aarch64 wheel includes pre-compiled FA2, FA3, MoE kernels - Original build-from-source code commented out for GH200 restoration - CMake compiler ABI detection fails under QEMU emulation
This commit is contained in:
@@ -120,21 +120,34 @@ RUN apt-get update && apt-get install -y build-essential cmake gcc && \
|
|||||||
pip wheel . -v --no-deps --no-build-isolation -w ./wheels/ && \
|
pip wheel . -v --no-deps --no-build-isolation -w ./wheels/ && \
|
||||||
cp wheels/*.whl /wheels/
|
cp wheels/*.whl /wheels/
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# NOTE: Using PyPI vLLM wheel instead of building from source
|
||||||
|
# Reason: QEMU cmake try_compile fails during compiler ABI detection
|
||||||
|
# PyPI wheel v0.18.1 includes pre-compiled FA2, FA3, MoE kernels for aarch64
|
||||||
|
# To restore native build on GH200, uncomment the block below and comment out
|
||||||
|
# the PyPI download section.
|
||||||
|
# ==============================================================================
|
||||||
|
# FROM build-base AS build-vllm
|
||||||
|
# ARG VLLM_REF=v0.11.1rc2
|
||||||
|
# # Install ccache for faster compilation
|
||||||
|
# RUN apt-get update && apt-get install -y ccache
|
||||||
|
# RUN git clone https://github.com/vllm-project/vllm.git
|
||||||
|
# RUN cd vllm && \
|
||||||
|
# git checkout ${VLLM_REF} && \
|
||||||
|
# git submodule sync && \
|
||||||
|
# git submodule update --init --recursive -j 8 && \
|
||||||
|
# sed -i 's/GIT_TAG [a-f0-9]\{40\}/GIT_TAG main/' cmake/external_projects/vllm_flash_attn.cmake && \
|
||||||
|
# export MAX_JOBS=4 && \
|
||||||
|
# export CMAKE_BUILD_PARALLEL_LEVEL=$MAX_JOBS && \
|
||||||
|
# python use_existing_torch.py && \
|
||||||
|
# uv pip install -r requirements/build.txt && \
|
||||||
|
# CCACHE_NOHASHDIR="true" uv build --wheel --no-build-isolation -o /wheels
|
||||||
|
|
||||||
|
# Use PyPI vLLM wheel (QEMU cmake fails during try_compile)
|
||||||
FROM build-base AS build-vllm
|
FROM build-base AS build-vllm
|
||||||
ARG VLLM_REF=v0.11.1rc2
|
ARG VLLM_VERSION=0.18.1
|
||||||
# Install ccache for faster compilation
|
RUN mkdir -p /wheels && \
|
||||||
RUN apt-get update && apt-get install -y ccache
|
pip download vllm==${VLLM_VERSION} --platform manylinux_2_31_aarch64 --only-binary=:all: --no-deps -d /wheels
|
||||||
RUN git clone https://github.com/vllm-project/vllm.git
|
|
||||||
RUN cd vllm && \
|
|
||||||
git checkout ${VLLM_REF} && \
|
|
||||||
git submodule sync && \
|
|
||||||
git submodule update --init --recursive -j 8 && \
|
|
||||||
sed -i 's/GIT_TAG [a-f0-9]\{40\}/GIT_TAG main/' cmake/external_projects/vllm_flash_attn.cmake && \
|
|
||||||
export MAX_JOBS=4 && \
|
|
||||||
export CMAKE_BUILD_PARALLEL_LEVEL=$MAX_JOBS && \
|
|
||||||
python use_existing_torch.py && \
|
|
||||||
uv pip install -r requirements/build.txt && \
|
|
||||||
CCACHE_NOHASHDIR="true" uv build --wheel --no-build-isolation -o /wheels
|
|
||||||
|
|
||||||
# Build infinistore after vllm to avoid cache invalidation
|
# Build infinistore after vllm to avoid cache invalidation
|
||||||
FROM build-base AS build-infinistore
|
FROM build-base AS build-infinistore
|
||||||
|
|||||||
Reference in New Issue
Block a user