From a399fbc8c6ea982e1e7cc4d7183b681a165ed84b Mon Sep 17 00:00:00 2001 From: biondizzle Date: Fri, 3 Apr 2026 02:49:43 +0000 Subject: [PATCH] Add MAX_JOBS=2 for LMCache, restore vLLM build from source - LMCache: reduced parallelism to avoid memory pressure - vLLM: restored build from source (was using PyPI wheel) - Will test with docker --memory=24g limit --- vllm/Dockerfile | 44 +++++++++++++++++++++----------------------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/vllm/Dockerfile b/vllm/Dockerfile index 4592cdc..0f825af 100644 --- a/vllm/Dockerfile +++ b/vllm/Dockerfile @@ -100,7 +100,7 @@ RUN git clone https://github.com/LMCache/LMCache.git -b ${LMCACHE_REF} && \ # PyTorch version is dated in LMCache sed -i '/torch/d' pyproject.toml && \ uv pip install setuptools_scm && \ - python -m build --wheel --no-isolation && \ + MAX_JOBS=2 python -m build --wheel --no-isolation && \ cp dist/*.whl /wheels/ @@ -121,33 +121,31 @@ RUN apt-get update && apt-get install -y build-essential cmake gcc && \ cp wheels/*.whl /wheels/ # ============================================================================== -# NOTE: Using PyPI vLLM wheel instead of building from source -# Reason: QEMU cmake try_compile fails during compiler ABI detection -# PyPI wheel v0.18.1 includes pre-compiled FA2, FA3, MoE kernels for aarch64 +# NOTE: Temporarily using PyPI vLLM wheel for QEMU testing # To restore native build on GH200, uncomment the block below and comment out # the PyPI download section. # ============================================================================== -# FROM build-base AS build-vllm -# ARG VLLM_REF=v0.11.1rc2 -# # Install ccache for faster compilation -# RUN apt-get update && apt-get install -y ccache -# RUN git clone https://github.com/vllm-project/vllm.git -# RUN cd vllm && \ -# git checkout ${VLLM_REF} && \ -# git submodule sync && \ -# git submodule update --init --recursive -j 8 && \ -# sed -i 's/GIT_TAG [a-f0-9]\{40\}/GIT_TAG main/' cmake/external_projects/vllm_flash_attn.cmake && \ -# export MAX_JOBS=4 && \ -# export CMAKE_BUILD_PARALLEL_LEVEL=$MAX_JOBS && \ -# python use_existing_torch.py && \ -# uv pip install -r requirements/build.txt && \ -# CCACHE_NOHASHDIR="true" uv build --wheel --no-build-isolation -o /wheels +FROM build-base AS build-vllm +ARG VLLM_REF=v0.18.1 +# Install ccache for faster compilation +RUN apt-get update && apt-get install -y ccache +RUN git clone https://github.com/vllm-project/vllm.git +RUN cd vllm && \ + git checkout ${VLLM_REF} && \ + git submodule sync && \ + git submodule update --init --recursive -j 8 && \ + sed -i 's/GIT_TAG [a-f0-9]\{40\}/GIT_TAG main/' cmake/external_projects/vllm_flash_attn.cmake && \ + export MAX_JOBS=4 && \ + export CMAKE_BUILD_PARALLEL_LEVEL=$MAX_JOBS && \ + python use_existing_torch.py && \ + uv pip install -r requirements/build.txt && \ + CCACHE_NOHASHDIR="true" uv build --wheel --no-build-isolation -o /wheels # Use PyPI vLLM wheel (QEMU cmake fails during try_compile) -FROM build-base AS build-vllm -ARG VLLM_VERSION=0.18.1 -RUN mkdir -p /wheels && \ - pip download vllm==${VLLM_VERSION} --platform manylinux_2_31_aarch64 --only-binary=:all: --no-deps -d /wheels +# FROM build-base AS build-vllm +# ARG VLLM_VERSION=0.18.1 +# RUN mkdir -p /wheels && \ +# pip download vllm==${VLLM_VERSION} --platform manylinux_2_31_aarch64 --only-binary=:all: --no-deps -d /wheels # Build infinistore after vllm to avoid cache invalidation FROM build-base AS build-infinistore