diff --git a/docker/Dockerfile b/docker/Dockerfile index fd447e9be..5f9649144 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -148,12 +148,36 @@ ARG PYTORCH_CUDA_INDEX_BASE_URL WORKDIR /workspace -# install build and runtime dependencies +# We can specify the standard or nightly build of PyTorch +ARG PYTORCH_NIGHTLY + +# Install build and runtime dependencies, including PyTorch +# Check whether to install torch nightly instead of release for this build COPY requirements/common.txt requirements/common.txt COPY requirements/cuda.txt requirements/cuda.txt +COPY use_existing_torch.py use_existing_torch.py +COPY pyproject.toml pyproject.toml RUN --mount=type=cache,target=/root/.cache/uv \ - uv pip install --python /opt/venv/bin/python3 -r requirements/cuda.txt \ - --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') + if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \ + echo "Installing torch nightly..." \ + && uv pip install --python /opt/venv/bin/python3 torch torchaudio torchvision --pre \ + --index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') \ + && echo "Installing other requirements..." \ + && /opt/venv/bin/python3 use_existing_torch.py --prefix \ + && uv pip install --python /opt/venv/bin/python3 -r requirements/cuda.txt \ + --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \ + else \ + uv pip install --python /opt/venv/bin/python3 -r requirements/cuda.txt \ + --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \ + fi + +# Track PyTorch lib versions used during build and match in downstream instances. +# We do this for both nightly and release so we can strip dependencies/*.txt as needed. +# Otherwise library dependencies can upgrade/downgrade torch incorrectly. +RUN --mount=type=cache,target=/root/.cache/uv \ + uv pip freeze | grep -i "^torch=\|^torchvision=\|^torchaudio=" > torch_lib_versions.txt \ + && TORCH_LIB_VERSIONS=$(cat torch_lib_versions.txt | xargs) \ + && echo "Installed torch libs: ${TORCH_LIB_VERSIONS}" # CUDA arch list used by torch # Explicitly set the list to avoid issues with torch 2.2 @@ -171,8 +195,13 @@ ARG PIP_INDEX_URL UV_INDEX_URL ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL ARG PYTORCH_CUDA_INDEX_BASE_URL -# install build dependencies +# We can specify the standard or nightly build of PyTorch +ARG PYTORCH_NIGHTLY + +# Install build dependencies COPY requirements/build.txt requirements/build.txt +COPY use_existing_torch.py use_existing_torch.py +COPY --from=base /workspace/torch_lib_versions.txt torch_lib_versions.txt # This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out # Reference: https://github.com/astral-sh/uv/pull/1694 @@ -182,8 +211,18 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match" ENV UV_LINK_MODE=copy RUN --mount=type=cache,target=/root/.cache/uv \ - uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \ - --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') + if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \ + echo "Installing build requirements without torch..." \ + && python3 use_existing_torch.py --prefix \ + && uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \ + && echo "Installing torch nightly..." \ + && uv pip install --python /opt/venv/bin/python3 $(cat torch_lib_versions.txt | grep -i "^torch=" | xargs) --pre \ + --index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \ + else \ + echo "Installing build requirements..." \ + && uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \ + --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \ + fi WORKDIR /workspace @@ -215,6 +254,13 @@ ARG VLLM_MAIN_CUDA_VERSION="" # Use dummy version for csrc-build wheel (only .so files are extracted, version doesn't matter) ENV SETUPTOOLS_SCM_PRETEND_VERSION="0.0.0+csrc.build" +# Use existing torch for nightly builds +RUN --mount=type=cache,target=/root/.cache/uv \ + if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \ + python3 use_existing_torch.py --prefix; \ + fi + +# Build the vLLM wheel # if USE_SCCACHE is set, use sccache to speed up compilation RUN --mount=type=cache,target=/root/.cache/uv \ if [ "$USE_SCCACHE" = "1" ]; then \ @@ -258,6 +304,7 @@ RUN --mount=type=cache,target=/root/.cache/ccache \ export VLLM_DOCKER_BUILD_CONTEXT=1 && \ python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38; \ fi + #################### CSRC BUILD IMAGE #################### #################### EXTENSIONS BUILD IMAGE #################### @@ -314,8 +361,13 @@ ARG PIP_INDEX_URL UV_INDEX_URL ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL ARG PYTORCH_CUDA_INDEX_BASE_URL -# install build dependencies +# We can specify the standard or nightly build of PyTorch +ARG PYTORCH_NIGHTLY + +# Install build dependencies COPY requirements/build.txt requirements/build.txt +COPY use_existing_torch.py use_existing_torch.py +COPY --from=base /workspace/torch_lib_versions.txt torch_lib_versions.txt # This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out # Reference: https://github.com/astral-sh/uv/pull/1694 @@ -325,14 +377,23 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match" ENV UV_LINK_MODE=copy RUN --mount=type=cache,target=/root/.cache/uv \ - uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \ - --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') + if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \ + echo "Installing build requirements without torch..." \ + && python3 use_existing_torch.py --prefix \ + && uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \ + && echo "Installing torch nightly..." \ + && uv pip install --python /opt/venv/bin/python3 $(cat torch_lib_versions.txt | grep -i "^torch=" | xargs) --pre \ + --index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \ + else \ + echo "Installing build requirements..." \ + && uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \ + --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \ + fi WORKDIR /workspace # Copy pre-built csrc wheel directly COPY --from=csrc-build /workspace/dist /precompiled-wheels - COPY . . ARG GIT_REPO_CHECK=0 @@ -345,6 +406,13 @@ ENV VLLM_TARGET_DEVICE=${vllm_target_device} # Skip adding +precompiled suffix to version (preserves git-derived version) ENV VLLM_SKIP_PRECOMPILED_VERSION_SUFFIX=1 +# Use existing torch for nightly builds +RUN --mount=type=cache,target=/root/.cache/uv \ + if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \ + python3 use_existing_torch.py --prefix; \ + fi + +# Build the vLLM wheel RUN --mount=type=cache,target=/root/.cache/uv \ --mount=type=bind,source=.git,target=.git \ if [ "${vllm_target_device}" = "cuda" ]; then \ @@ -367,7 +435,8 @@ RUN if [ "$RUN_WHEEL_CHECK" = "true" ]; then \ else \ echo "Skipping wheel size check."; \ fi -#################### EXTENSION Build IMAGE #################### + +#################### WHEEL BUILD IMAGE #################### #################### DEV IMAGE #################### FROM base AS dev @@ -385,12 +454,34 @@ ENV UV_LINK_MODE=copy # Install libnuma-dev, required by fastsafetensors (fixes #20384) RUN apt-get update && apt-get install -y --no-install-recommends libnuma-dev && rm -rf /var/lib/apt/lists/* + + +# We can specify the standard or nightly build of PyTorch +ARG PYTORCH_NIGHTLY + +# Install development dependencies COPY requirements/lint.txt requirements/lint.txt +COPY requirements/test.in requirements/test.in COPY requirements/test.txt requirements/test.txt COPY requirements/dev.txt requirements/dev.txt +COPY use_existing_torch.py use_existing_torch.py +COPY --from=base /workspace/torch_lib_versions.txt torch_lib_versions.txt RUN --mount=type=cache,target=/root/.cache/uv \ - uv pip install --python /opt/venv/bin/python3 -r requirements/dev.txt \ - --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') + if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \ + echo "Installing dev requirements plus torch nightly..." \ + && python3 use_existing_torch.py --prefix \ + && cat torch_lib_versions.txt >> requirements/test.in \ + && uv pip compile requirements/test.in -o requirements/test.txt --index-strategy unsafe-best-match \ + --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') \ + && uv pip install --python /opt/venv/bin/python3 $(cat torch_lib_versions.txt | xargs) --pre \ + -r requirements/dev.txt \ + --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \ + else \ + echo "Installing dev requirements..." \ + && uv pip install --python /opt/venv/bin/python3 -r requirements/dev.txt \ + --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \ + fi + #################### DEV IMAGE #################### #################### vLLM installation IMAGE #################### # image with vLLM installed @@ -548,11 +639,26 @@ ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL ARG PYTORCH_CUDA_INDEX_BASE_URL ARG PIP_KEYRING_PROVIDER UV_KEYRING_PROVIDER -# Install vllm wheel first, so that torch etc will be installed. +# We can specify the standard or nightly build of PyTorch +ARG PYTORCH_NIGHTLY + +# Install vLLM wheel first, so that torch etc will be installed. +# Check whether to install torch nightly instead of release for this build. +COPY --from=base /workspace/torch_lib_versions.txt torch_lib_versions.txt RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \ --mount=type=cache,target=/root/.cache/uv \ - uv pip install --system dist/*.whl --verbose \ - --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') + if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \ + echo "Installing torch nightly..." \ + && uv pip install --system $(cat torch_lib_versions.txt | xargs) --pre \ + --index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') \ + && echo "Installing vLLM..." \ + && uv pip install --system dist/*.whl --verbose \ + --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \ + else \ + echo "Installing vLLM..." \ + && uv pip install --system dist/*.whl --verbose \ + --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \ + fi RUN --mount=type=cache,target=/root/.cache/uv \ . /etc/environment && \ @@ -612,12 +718,33 @@ RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \ && apt-get update -y \ && apt-get install -y git -# install development dependencies (for testing) +# We can specify the standard or nightly build of PyTorch +ARG PYTORCH_NIGHTLY + +# Install development dependencies (for testing) +COPY requirements/lint.txt requirements/lint.txt +COPY requirements/test.in requirements/test.in +COPY requirements/test.txt requirements/test.txt +COPY requirements/dev.txt requirements/dev.txt +COPY use_existing_torch.py use_existing_torch.py +COPY --from=base /workspace/torch_lib_versions.txt torch_lib_versions.txt RUN --mount=type=cache,target=/root/.cache/uv \ CUDA_MAJOR="${CUDA_VERSION%%.*}"; \ if [ "$CUDA_MAJOR" -ge 12 ]; then \ - uv pip install --system -r requirements/dev.txt \ - --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \ + if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \ + echo "Installing dev requirements plus torch nightly..." \ + && python3 use_existing_torch.py --prefix \ + && cat torch_lib_versions.txt >> requirements/test.in \ + && uv pip compile requirements/test.in -o requirements/test.txt --index-strategy unsafe-best-match \ + --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') \ + && uv pip install --system $(cat torch_lib_versions.txt | xargs) --pre \ + -r requirements/dev.txt \ + --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \ + else \ + echo "Installing dev requirements..." \ + && uv pip install --system -r requirements/dev.txt \ + --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \ + fi \ fi # install development dependencies (for testing) diff --git a/docker/Dockerfile.nightly_torch b/docker/Dockerfile.nightly_torch index b07ef8c1c..7731c0477 100644 --- a/docker/Dockerfile.nightly_torch +++ b/docker/Dockerfile.nightly_torch @@ -1,3 +1,11 @@ +####### +# +# THIS FILE IS DEPRECATED AND WILL BE REMOVED SHORTLY +# +# Please use the standard Dockerfile with PYTORCH_NIGHTLY=1 instead +# +####### + # The vLLM Dockerfile is used to construct vLLM image against torch nightly that can be directly used for testing # for torch nightly, cuda >=12.6 is required, diff --git a/docs/assets/contributing/dockerfile-stages-dependency.png b/docs/assets/contributing/dockerfile-stages-dependency.png index c8839eb93..9ac394d4c 100644 Binary files a/docs/assets/contributing/dockerfile-stages-dependency.png and b/docs/assets/contributing/dockerfile-stages-dependency.png differ diff --git a/use_existing_torch.py b/use_existing_torch.py index e2d3f2ec8..7c58a34d6 100644 --- a/use_existing_torch.py +++ b/use_existing_torch.py @@ -1,18 +1,54 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import argparse import glob +import sys -for file in (*glob.glob("requirements/*.txt"), "pyproject.toml"): - print(f">>> cleaning {file}") - with open(file) as f: - lines = f.readlines() - if "torch" in "".join(lines).lower(): - print("removed:") - with open(file, "w") as f: - for line in lines: - if "torch" not in line.lower(): - f.write(line) - else: - print(line.strip()) - print(f"<<< done cleaning {file}\n") +# Only strip targeted libraries when checking prefix +TORCH_LIB_PREFIXES = ( + # requirements/*.txt/in + "torch=", + "torchvision=", + "torchaudio=", + # pyproject.toml + '"torch =', + '"torchvision =', + '"torchaudio =', +) + + +def main(argv): + parser = argparse.ArgumentParser( + description="Strip torch lib requirements to use installed version." + ) + parser.add_argument( + "--prefix", + action="store_true", + help="Strip prefix matches only (default: False)", + ) + args = parser.parse_args(argv) + + for file in ( + *glob.glob("requirements/*.txt"), + *glob.glob("requirements/*.in"), + "pyproject.toml", + ): + with open(file) as f: + lines = f.readlines() + if "torch" in "".join(lines).lower(): + with open(file, "w") as f: + for line in lines: + if ( + args.prefix + and not line.lower().strip().startswith(TORCH_LIB_PREFIXES) + or not args.prefix + and "torch" not in line.lower() + ): + f.write(line) + else: + print(f">>> removed from {file}:", line.strip()) + + +if __name__ == "__main__": + main(sys.argv[1:])