diff --git a/vllm/Dockerfile b/vllm/Dockerfile index 8c5c0e8..469333c 100644 --- a/vllm/Dockerfile +++ b/vllm/Dockerfile @@ -73,13 +73,11 @@ FROM build-base AS build-xformers #ARG XFORMERS_REF=v0.0.32.post2 #ARG XFORMERS_BUILD_VERSION=0.0.30+cu130 #ENV BUILD_VERSION=${XFORMERS_BUILD_VERSION:-${XFORMERS_REF#v}} -RUN git clone https://github.com/johnnynunez/xformers.git -# https://github.com/facebookresearch/xformers.git +RUN git clone https://github.com/facebookresearch/xformers.git RUN cd xformers && \ -# git checkout ${XFORMERS_REF} && \ git submodule sync && \ git submodule update --init --recursive -j 8 && \ - MAX_JOBS=3 uv build --wheel --no-build-isolation -o /wheels + MAX_JOBS=8 uv build --wheel --no-build-isolation -o /wheels FROM build-base AS build-flashinfer ARG FLASHINFER_ENABLE_AOT=1 @@ -101,7 +99,7 @@ RUN git clone https://github.com/LMCache/LMCache.git && \ # PyTorch version is dated in LMCache sed -i '/torch/d' pyproject.toml && \ uv pip install setuptools_scm && \ - MAX_JOBS=2 python -m build --wheel --no-isolation && \ + MAX_JOBS=8 python -m build --wheel --no-isolation && \ cp dist/*.whl /wheels/ @@ -110,8 +108,8 @@ RUN apt-get update && apt-get install -y build-essential cmake gcc && \ git clone https://github.com/Dao-AILab/flash-attention flash-attention && \ cd flash-attention/hopper && \ mkdir wheels && \ - export MAX_JOBS=4 && \ - export NVCC_THREADS=1 && \ + export MAX_JOBS=8 && \ + export NVCC_THREADS=4 && \ export CMAKE_BUILD_PARALLEL_LEVEL=$MAX_JOBS && \ MAX_JOBS=$MAX_JOBS \ CMAKE_BUILD_PARALLEL_LEVEL=$MAX_JOBS \ @@ -137,7 +135,7 @@ RUN cd vllm && \ git submodule sync && \ git submodule update --init --recursive -j 8 && \ sed -i 's/GIT_TAG [a-f0-9]\{40\}/GIT_TAG main/' cmake/external_projects/vllm_flash_attn.cmake && \ - export MAX_JOBS=4 && \ + export MAX_JOBS=8 && \ export CMAKE_BUILD_PARALLEL_LEVEL=$MAX_JOBS && \ python use_existing_torch.py && \ uv pip install -r requirements/build.txt && \