[CI][torch nightlies] Use main Dockerfile with flags for nightly torch tests (#30443)
Signed-off-by: Orion Reblitz-Richardson <orionr@meta.com> Signed-off-by: Orion Reblitz-Richardson <orionr@gmail.com> Co-authored-by: Kevin H. Luu <khluu000@gmail.com>
This commit is contained in:
committed by
GitHub
parent
5206e5e28c
commit
68b0a6c1ba
@@ -148,12 +148,36 @@ ARG PYTORCH_CUDA_INDEX_BASE_URL
|
||||
|
||||
WORKDIR /workspace
|
||||
|
||||
# install build and runtime dependencies
|
||||
# We can specify the standard or nightly build of PyTorch
|
||||
ARG PYTORCH_NIGHTLY
|
||||
|
||||
# Install build and runtime dependencies, including PyTorch
|
||||
# Check whether to install torch nightly instead of release for this build
|
||||
COPY requirements/common.txt requirements/common.txt
|
||||
COPY requirements/cuda.txt requirements/cuda.txt
|
||||
COPY use_existing_torch.py use_existing_torch.py
|
||||
COPY pyproject.toml pyproject.toml
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
uv pip install --python /opt/venv/bin/python3 -r requirements/cuda.txt \
|
||||
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
|
||||
if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
|
||||
echo "Installing torch nightly..." \
|
||||
&& uv pip install --python /opt/venv/bin/python3 torch torchaudio torchvision --pre \
|
||||
--index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') \
|
||||
&& echo "Installing other requirements..." \
|
||||
&& /opt/venv/bin/python3 use_existing_torch.py --prefix \
|
||||
&& uv pip install --python /opt/venv/bin/python3 -r requirements/cuda.txt \
|
||||
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
|
||||
else \
|
||||
uv pip install --python /opt/venv/bin/python3 -r requirements/cuda.txt \
|
||||
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
|
||||
fi
|
||||
|
||||
# Track PyTorch lib versions used during build and match in downstream instances.
|
||||
# We do this for both nightly and release so we can strip dependencies/*.txt as needed.
|
||||
# Otherwise library dependencies can upgrade/downgrade torch incorrectly.
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
uv pip freeze | grep -i "^torch=\|^torchvision=\|^torchaudio=" > torch_lib_versions.txt \
|
||||
&& TORCH_LIB_VERSIONS=$(cat torch_lib_versions.txt | xargs) \
|
||||
&& echo "Installed torch libs: ${TORCH_LIB_VERSIONS}"
|
||||
|
||||
# CUDA arch list used by torch
|
||||
# Explicitly set the list to avoid issues with torch 2.2
|
||||
@@ -171,8 +195,13 @@ ARG PIP_INDEX_URL UV_INDEX_URL
|
||||
ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
|
||||
ARG PYTORCH_CUDA_INDEX_BASE_URL
|
||||
|
||||
# install build dependencies
|
||||
# We can specify the standard or nightly build of PyTorch
|
||||
ARG PYTORCH_NIGHTLY
|
||||
|
||||
# Install build dependencies
|
||||
COPY requirements/build.txt requirements/build.txt
|
||||
COPY use_existing_torch.py use_existing_torch.py
|
||||
COPY --from=base /workspace/torch_lib_versions.txt torch_lib_versions.txt
|
||||
|
||||
# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out
|
||||
# Reference: https://github.com/astral-sh/uv/pull/1694
|
||||
@@ -182,8 +211,18 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match"
|
||||
ENV UV_LINK_MODE=copy
|
||||
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \
|
||||
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
|
||||
if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
|
||||
echo "Installing build requirements without torch..." \
|
||||
&& python3 use_existing_torch.py --prefix \
|
||||
&& uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \
|
||||
&& echo "Installing torch nightly..." \
|
||||
&& uv pip install --python /opt/venv/bin/python3 $(cat torch_lib_versions.txt | grep -i "^torch=" | xargs) --pre \
|
||||
--index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
|
||||
else \
|
||||
echo "Installing build requirements..." \
|
||||
&& uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \
|
||||
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
|
||||
fi
|
||||
|
||||
WORKDIR /workspace
|
||||
|
||||
@@ -215,6 +254,13 @@ ARG VLLM_MAIN_CUDA_VERSION=""
|
||||
# Use dummy version for csrc-build wheel (only .so files are extracted, version doesn't matter)
|
||||
ENV SETUPTOOLS_SCM_PRETEND_VERSION="0.0.0+csrc.build"
|
||||
|
||||
# Use existing torch for nightly builds
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
|
||||
python3 use_existing_torch.py --prefix; \
|
||||
fi
|
||||
|
||||
# Build the vLLM wheel
|
||||
# if USE_SCCACHE is set, use sccache to speed up compilation
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
if [ "$USE_SCCACHE" = "1" ]; then \
|
||||
@@ -258,6 +304,7 @@ RUN --mount=type=cache,target=/root/.cache/ccache \
|
||||
export VLLM_DOCKER_BUILD_CONTEXT=1 && \
|
||||
python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38; \
|
||||
fi
|
||||
|
||||
#################### CSRC BUILD IMAGE ####################
|
||||
|
||||
#################### EXTENSIONS BUILD IMAGE ####################
|
||||
@@ -314,8 +361,13 @@ ARG PIP_INDEX_URL UV_INDEX_URL
|
||||
ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
|
||||
ARG PYTORCH_CUDA_INDEX_BASE_URL
|
||||
|
||||
# install build dependencies
|
||||
# We can specify the standard or nightly build of PyTorch
|
||||
ARG PYTORCH_NIGHTLY
|
||||
|
||||
# Install build dependencies
|
||||
COPY requirements/build.txt requirements/build.txt
|
||||
COPY use_existing_torch.py use_existing_torch.py
|
||||
COPY --from=base /workspace/torch_lib_versions.txt torch_lib_versions.txt
|
||||
|
||||
# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out
|
||||
# Reference: https://github.com/astral-sh/uv/pull/1694
|
||||
@@ -325,14 +377,23 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match"
|
||||
ENV UV_LINK_MODE=copy
|
||||
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \
|
||||
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
|
||||
if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
|
||||
echo "Installing build requirements without torch..." \
|
||||
&& python3 use_existing_torch.py --prefix \
|
||||
&& uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \
|
||||
&& echo "Installing torch nightly..." \
|
||||
&& uv pip install --python /opt/venv/bin/python3 $(cat torch_lib_versions.txt | grep -i "^torch=" | xargs) --pre \
|
||||
--index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
|
||||
else \
|
||||
echo "Installing build requirements..." \
|
||||
&& uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \
|
||||
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
|
||||
fi
|
||||
|
||||
WORKDIR /workspace
|
||||
|
||||
# Copy pre-built csrc wheel directly
|
||||
COPY --from=csrc-build /workspace/dist /precompiled-wheels
|
||||
|
||||
COPY . .
|
||||
|
||||
ARG GIT_REPO_CHECK=0
|
||||
@@ -345,6 +406,13 @@ ENV VLLM_TARGET_DEVICE=${vllm_target_device}
|
||||
# Skip adding +precompiled suffix to version (preserves git-derived version)
|
||||
ENV VLLM_SKIP_PRECOMPILED_VERSION_SUFFIX=1
|
||||
|
||||
# Use existing torch for nightly builds
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
|
||||
python3 use_existing_torch.py --prefix; \
|
||||
fi
|
||||
|
||||
# Build the vLLM wheel
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
--mount=type=bind,source=.git,target=.git \
|
||||
if [ "${vllm_target_device}" = "cuda" ]; then \
|
||||
@@ -367,7 +435,8 @@ RUN if [ "$RUN_WHEEL_CHECK" = "true" ]; then \
|
||||
else \
|
||||
echo "Skipping wheel size check."; \
|
||||
fi
|
||||
#################### EXTENSION Build IMAGE ####################
|
||||
|
||||
#################### WHEEL BUILD IMAGE ####################
|
||||
|
||||
#################### DEV IMAGE ####################
|
||||
FROM base AS dev
|
||||
@@ -385,12 +454,34 @@ ENV UV_LINK_MODE=copy
|
||||
|
||||
# Install libnuma-dev, required by fastsafetensors (fixes #20384)
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends libnuma-dev && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
|
||||
# We can specify the standard or nightly build of PyTorch
|
||||
ARG PYTORCH_NIGHTLY
|
||||
|
||||
# Install development dependencies
|
||||
COPY requirements/lint.txt requirements/lint.txt
|
||||
COPY requirements/test.in requirements/test.in
|
||||
COPY requirements/test.txt requirements/test.txt
|
||||
COPY requirements/dev.txt requirements/dev.txt
|
||||
COPY use_existing_torch.py use_existing_torch.py
|
||||
COPY --from=base /workspace/torch_lib_versions.txt torch_lib_versions.txt
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
uv pip install --python /opt/venv/bin/python3 -r requirements/dev.txt \
|
||||
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
|
||||
if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
|
||||
echo "Installing dev requirements plus torch nightly..." \
|
||||
&& python3 use_existing_torch.py --prefix \
|
||||
&& cat torch_lib_versions.txt >> requirements/test.in \
|
||||
&& uv pip compile requirements/test.in -o requirements/test.txt --index-strategy unsafe-best-match \
|
||||
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') \
|
||||
&& uv pip install --python /opt/venv/bin/python3 $(cat torch_lib_versions.txt | xargs) --pre \
|
||||
-r requirements/dev.txt \
|
||||
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
|
||||
else \
|
||||
echo "Installing dev requirements..." \
|
||||
&& uv pip install --python /opt/venv/bin/python3 -r requirements/dev.txt \
|
||||
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
|
||||
fi
|
||||
|
||||
#################### DEV IMAGE ####################
|
||||
#################### vLLM installation IMAGE ####################
|
||||
# image with vLLM installed
|
||||
@@ -548,11 +639,26 @@ ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
|
||||
ARG PYTORCH_CUDA_INDEX_BASE_URL
|
||||
ARG PIP_KEYRING_PROVIDER UV_KEYRING_PROVIDER
|
||||
|
||||
# Install vllm wheel first, so that torch etc will be installed.
|
||||
# We can specify the standard or nightly build of PyTorch
|
||||
ARG PYTORCH_NIGHTLY
|
||||
|
||||
# Install vLLM wheel first, so that torch etc will be installed.
|
||||
# Check whether to install torch nightly instead of release for this build.
|
||||
COPY --from=base /workspace/torch_lib_versions.txt torch_lib_versions.txt
|
||||
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \
|
||||
--mount=type=cache,target=/root/.cache/uv \
|
||||
uv pip install --system dist/*.whl --verbose \
|
||||
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
|
||||
if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
|
||||
echo "Installing torch nightly..." \
|
||||
&& uv pip install --system $(cat torch_lib_versions.txt | xargs) --pre \
|
||||
--index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') \
|
||||
&& echo "Installing vLLM..." \
|
||||
&& uv pip install --system dist/*.whl --verbose \
|
||||
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
|
||||
else \
|
||||
echo "Installing vLLM..." \
|
||||
&& uv pip install --system dist/*.whl --verbose \
|
||||
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
|
||||
fi
|
||||
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
. /etc/environment && \
|
||||
@@ -612,12 +718,33 @@ RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
|
||||
&& apt-get update -y \
|
||||
&& apt-get install -y git
|
||||
|
||||
# install development dependencies (for testing)
|
||||
# We can specify the standard or nightly build of PyTorch
|
||||
ARG PYTORCH_NIGHTLY
|
||||
|
||||
# Install development dependencies (for testing)
|
||||
COPY requirements/lint.txt requirements/lint.txt
|
||||
COPY requirements/test.in requirements/test.in
|
||||
COPY requirements/test.txt requirements/test.txt
|
||||
COPY requirements/dev.txt requirements/dev.txt
|
||||
COPY use_existing_torch.py use_existing_torch.py
|
||||
COPY --from=base /workspace/torch_lib_versions.txt torch_lib_versions.txt
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
CUDA_MAJOR="${CUDA_VERSION%%.*}"; \
|
||||
if [ "$CUDA_MAJOR" -ge 12 ]; then \
|
||||
uv pip install --system -r requirements/dev.txt \
|
||||
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
|
||||
if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
|
||||
echo "Installing dev requirements plus torch nightly..." \
|
||||
&& python3 use_existing_torch.py --prefix \
|
||||
&& cat torch_lib_versions.txt >> requirements/test.in \
|
||||
&& uv pip compile requirements/test.in -o requirements/test.txt --index-strategy unsafe-best-match \
|
||||
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') \
|
||||
&& uv pip install --system $(cat torch_lib_versions.txt | xargs) --pre \
|
||||
-r requirements/dev.txt \
|
||||
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
|
||||
else \
|
||||
echo "Installing dev requirements..." \
|
||||
&& uv pip install --system -r requirements/dev.txt \
|
||||
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
|
||||
fi \
|
||||
fi
|
||||
|
||||
# install development dependencies (for testing)
|
||||
|
||||
@@ -1,3 +1,11 @@
|
||||
#######
|
||||
#
|
||||
# THIS FILE IS DEPRECATED AND WILL BE REMOVED SHORTLY
|
||||
#
|
||||
# Please use the standard Dockerfile with PYTORCH_NIGHTLY=1 instead
|
||||
#
|
||||
#######
|
||||
|
||||
# The vLLM Dockerfile is used to construct vLLM image against torch nightly that can be directly used for testing
|
||||
|
||||
# for torch nightly, cuda >=12.6 is required,
|
||||
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 205 KiB After Width: | Height: | Size: 325 KiB |
@@ -1,18 +1,54 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import argparse
|
||||
import glob
|
||||
import sys
|
||||
|
||||
for file in (*glob.glob("requirements/*.txt"), "pyproject.toml"):
|
||||
print(f">>> cleaning {file}")
|
||||
with open(file) as f:
|
||||
lines = f.readlines()
|
||||
if "torch" in "".join(lines).lower():
|
||||
print("removed:")
|
||||
with open(file, "w") as f:
|
||||
for line in lines:
|
||||
if "torch" not in line.lower():
|
||||
f.write(line)
|
||||
else:
|
||||
print(line.strip())
|
||||
print(f"<<< done cleaning {file}\n")
|
||||
# Only strip targeted libraries when checking prefix
|
||||
TORCH_LIB_PREFIXES = (
|
||||
# requirements/*.txt/in
|
||||
"torch=",
|
||||
"torchvision=",
|
||||
"torchaudio=",
|
||||
# pyproject.toml
|
||||
'"torch =',
|
||||
'"torchvision =',
|
||||
'"torchaudio =',
|
||||
)
|
||||
|
||||
|
||||
def main(argv):
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Strip torch lib requirements to use installed version."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--prefix",
|
||||
action="store_true",
|
||||
help="Strip prefix matches only (default: False)",
|
||||
)
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
for file in (
|
||||
*glob.glob("requirements/*.txt"),
|
||||
*glob.glob("requirements/*.in"),
|
||||
"pyproject.toml",
|
||||
):
|
||||
with open(file) as f:
|
||||
lines = f.readlines()
|
||||
if "torch" in "".join(lines).lower():
|
||||
with open(file, "w") as f:
|
||||
for line in lines:
|
||||
if (
|
||||
args.prefix
|
||||
and not line.lower().strip().startswith(TORCH_LIB_PREFIXES)
|
||||
or not args.prefix
|
||||
and "torch" not in line.lower()
|
||||
):
|
||||
f.write(line)
|
||||
else:
|
||||
print(f">>> removed from {file}:", line.strip())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(sys.argv[1:])
|
||||
|
||||
Reference in New Issue
Block a user