Add MAX_JOBS=2 for LMCache, restore vLLM build from source

- LMCache: reduced parallelism to avoid memory pressure
- vLLM: restored build from source (was using PyPI wheel)
- Will test with docker --memory=24g limit
This commit is contained in:
2026-04-03 02:49:43 +00:00
parent f8a9d372e5
commit a399fbc8c6

View File

@@ -100,7 +100,7 @@ RUN git clone https://github.com/LMCache/LMCache.git -b ${LMCACHE_REF} && \
# PyTorch version is dated in LMCache # PyTorch version is dated in LMCache
sed -i '/torch/d' pyproject.toml && \ sed -i '/torch/d' pyproject.toml && \
uv pip install setuptools_scm && \ uv pip install setuptools_scm && \
python -m build --wheel --no-isolation && \ MAX_JOBS=2 python -m build --wheel --no-isolation && \
cp dist/*.whl /wheels/ cp dist/*.whl /wheels/
@@ -121,33 +121,31 @@ RUN apt-get update && apt-get install -y build-essential cmake gcc && \
cp wheels/*.whl /wheels/ cp wheels/*.whl /wheels/
# ============================================================================== # ==============================================================================
# NOTE: Using PyPI vLLM wheel instead of building from source # NOTE: Temporarily using PyPI vLLM wheel for QEMU testing
# Reason: QEMU cmake try_compile fails during compiler ABI detection
# PyPI wheel v0.18.1 includes pre-compiled FA2, FA3, MoE kernels for aarch64
# To restore native build on GH200, uncomment the block below and comment out # To restore native build on GH200, uncomment the block below and comment out
# the PyPI download section. # the PyPI download section.
# ============================================================================== # ==============================================================================
# FROM build-base AS build-vllm FROM build-base AS build-vllm
# ARG VLLM_REF=v0.11.1rc2 ARG VLLM_REF=v0.18.1
# # Install ccache for faster compilation # Install ccache for faster compilation
# RUN apt-get update && apt-get install -y ccache RUN apt-get update && apt-get install -y ccache
# RUN git clone https://github.com/vllm-project/vllm.git RUN git clone https://github.com/vllm-project/vllm.git
# RUN cd vllm && \ RUN cd vllm && \
# git checkout ${VLLM_REF} && \ git checkout ${VLLM_REF} && \
# git submodule sync && \ git submodule sync && \
# git submodule update --init --recursive -j 8 && \ git submodule update --init --recursive -j 8 && \
# sed -i 's/GIT_TAG [a-f0-9]\{40\}/GIT_TAG main/' cmake/external_projects/vllm_flash_attn.cmake && \ sed -i 's/GIT_TAG [a-f0-9]\{40\}/GIT_TAG main/' cmake/external_projects/vllm_flash_attn.cmake && \
# export MAX_JOBS=4 && \ export MAX_JOBS=4 && \
# export CMAKE_BUILD_PARALLEL_LEVEL=$MAX_JOBS && \ export CMAKE_BUILD_PARALLEL_LEVEL=$MAX_JOBS && \
# python use_existing_torch.py && \ python use_existing_torch.py && \
# uv pip install -r requirements/build.txt && \ uv pip install -r requirements/build.txt && \
# CCACHE_NOHASHDIR="true" uv build --wheel --no-build-isolation -o /wheels CCACHE_NOHASHDIR="true" uv build --wheel --no-build-isolation -o /wheels
# Use PyPI vLLM wheel (QEMU cmake fails during try_compile) # Use PyPI vLLM wheel (QEMU cmake fails during try_compile)
FROM build-base AS build-vllm # FROM build-base AS build-vllm
ARG VLLM_VERSION=0.18.1 # ARG VLLM_VERSION=0.18.1
RUN mkdir -p /wheels && \ # RUN mkdir -p /wheels && \
pip download vllm==${VLLM_VERSION} --platform manylinux_2_31_aarch64 --only-binary=:all: --no-deps -d /wheels # pip download vllm==${VLLM_VERSION} --platform manylinux_2_31_aarch64 --only-binary=:all: --no-deps -d /wheels
# Build infinistore after vllm to avoid cache invalidation # Build infinistore after vllm to avoid cache invalidation
FROM build-base AS build-infinistore FROM build-base AS build-infinistore