[CI][torch nightlies] Use main Dockerfile with flags for nightly torch tests (#30443)

Signed-off-by: Orion Reblitz-Richardson <orionr@meta.com>
Signed-off-by: Orion Reblitz-Richardson <orionr@gmail.com>
Co-authored-by: Kevin H. Luu <khluu000@gmail.com>
This commit is contained in:
Orion Reblitz-Richardson
2026-01-23 08:22:56 -10:00
committed by GitHub
parent 5206e5e28c
commit 68b0a6c1ba
4 changed files with 203 additions and 32 deletions

View File

@@ -148,12 +148,36 @@ ARG PYTORCH_CUDA_INDEX_BASE_URL
WORKDIR /workspace
# install build and runtime dependencies
# We can specify the standard or nightly build of PyTorch
ARG PYTORCH_NIGHTLY
# Install build and runtime dependencies, including PyTorch
# Check whether to install torch nightly instead of release for this build
COPY requirements/common.txt requirements/common.txt
COPY requirements/cuda.txt requirements/cuda.txt
COPY use_existing_torch.py use_existing_torch.py
COPY pyproject.toml pyproject.toml
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --python /opt/venv/bin/python3 -r requirements/cuda.txt \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
echo "Installing torch nightly..." \
&& uv pip install --python /opt/venv/bin/python3 torch torchaudio torchvision --pre \
--index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') \
&& echo "Installing other requirements..." \
&& /opt/venv/bin/python3 use_existing_torch.py --prefix \
&& uv pip install --python /opt/venv/bin/python3 -r requirements/cuda.txt \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
else \
uv pip install --python /opt/venv/bin/python3 -r requirements/cuda.txt \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
fi
# Track PyTorch lib versions used during build and match in downstream instances.
# We do this for both nightly and release so we can strip dependencies/*.txt as needed.
# Otherwise library dependencies can upgrade/downgrade torch incorrectly.
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip freeze | grep -i "^torch=\|^torchvision=\|^torchaudio=" > torch_lib_versions.txt \
&& TORCH_LIB_VERSIONS=$(cat torch_lib_versions.txt | xargs) \
&& echo "Installed torch libs: ${TORCH_LIB_VERSIONS}"
# CUDA arch list used by torch
# Explicitly set the list to avoid issues with torch 2.2
@@ -171,8 +195,13 @@ ARG PIP_INDEX_URL UV_INDEX_URL
ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
ARG PYTORCH_CUDA_INDEX_BASE_URL
# install build dependencies
# We can specify the standard or nightly build of PyTorch
ARG PYTORCH_NIGHTLY
# Install build dependencies
COPY requirements/build.txt requirements/build.txt
COPY use_existing_torch.py use_existing_torch.py
COPY --from=base /workspace/torch_lib_versions.txt torch_lib_versions.txt
# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out
# Reference: https://github.com/astral-sh/uv/pull/1694
@@ -182,8 +211,18 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match"
ENV UV_LINK_MODE=copy
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
echo "Installing build requirements without torch..." \
&& python3 use_existing_torch.py --prefix \
&& uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \
&& echo "Installing torch nightly..." \
&& uv pip install --python /opt/venv/bin/python3 $(cat torch_lib_versions.txt | grep -i "^torch=" | xargs) --pre \
--index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
else \
echo "Installing build requirements..." \
&& uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
fi
WORKDIR /workspace
@@ -215,6 +254,13 @@ ARG VLLM_MAIN_CUDA_VERSION=""
# Use dummy version for csrc-build wheel (only .so files are extracted, version doesn't matter)
ENV SETUPTOOLS_SCM_PRETEND_VERSION="0.0.0+csrc.build"
# Use existing torch for nightly builds
RUN --mount=type=cache,target=/root/.cache/uv \
if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
python3 use_existing_torch.py --prefix; \
fi
# Build the vLLM wheel
# if USE_SCCACHE is set, use sccache to speed up compilation
RUN --mount=type=cache,target=/root/.cache/uv \
if [ "$USE_SCCACHE" = "1" ]; then \
@@ -258,6 +304,7 @@ RUN --mount=type=cache,target=/root/.cache/ccache \
export VLLM_DOCKER_BUILD_CONTEXT=1 && \
python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38; \
fi
#################### CSRC BUILD IMAGE ####################
#################### EXTENSIONS BUILD IMAGE ####################
@@ -314,8 +361,13 @@ ARG PIP_INDEX_URL UV_INDEX_URL
ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
ARG PYTORCH_CUDA_INDEX_BASE_URL
# install build dependencies
# We can specify the standard or nightly build of PyTorch
ARG PYTORCH_NIGHTLY
# Install build dependencies
COPY requirements/build.txt requirements/build.txt
COPY use_existing_torch.py use_existing_torch.py
COPY --from=base /workspace/torch_lib_versions.txt torch_lib_versions.txt
# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out
# Reference: https://github.com/astral-sh/uv/pull/1694
@@ -325,14 +377,23 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match"
ENV UV_LINK_MODE=copy
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
echo "Installing build requirements without torch..." \
&& python3 use_existing_torch.py --prefix \
&& uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \
&& echo "Installing torch nightly..." \
&& uv pip install --python /opt/venv/bin/python3 $(cat torch_lib_versions.txt | grep -i "^torch=" | xargs) --pre \
--index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
else \
echo "Installing build requirements..." \
&& uv pip install --python /opt/venv/bin/python3 -r requirements/build.txt \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
fi
WORKDIR /workspace
# Copy pre-built csrc wheel directly
COPY --from=csrc-build /workspace/dist /precompiled-wheels
COPY . .
ARG GIT_REPO_CHECK=0
@@ -345,6 +406,13 @@ ENV VLLM_TARGET_DEVICE=${vllm_target_device}
# Skip adding +precompiled suffix to version (preserves git-derived version)
ENV VLLM_SKIP_PRECOMPILED_VERSION_SUFFIX=1
# Use existing torch for nightly builds
RUN --mount=type=cache,target=/root/.cache/uv \
if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
python3 use_existing_torch.py --prefix; \
fi
# Build the vLLM wheel
RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,source=.git,target=.git \
if [ "${vllm_target_device}" = "cuda" ]; then \
@@ -367,7 +435,8 @@ RUN if [ "$RUN_WHEEL_CHECK" = "true" ]; then \
else \
echo "Skipping wheel size check."; \
fi
#################### EXTENSION Build IMAGE ####################
#################### WHEEL BUILD IMAGE ####################
#################### DEV IMAGE ####################
FROM base AS dev
@@ -385,12 +454,34 @@ ENV UV_LINK_MODE=copy
# Install libnuma-dev, required by fastsafetensors (fixes #20384)
RUN apt-get update && apt-get install -y --no-install-recommends libnuma-dev && rm -rf /var/lib/apt/lists/*
# We can specify the standard or nightly build of PyTorch
ARG PYTORCH_NIGHTLY
# Install development dependencies
COPY requirements/lint.txt requirements/lint.txt
COPY requirements/test.in requirements/test.in
COPY requirements/test.txt requirements/test.txt
COPY requirements/dev.txt requirements/dev.txt
COPY use_existing_torch.py use_existing_torch.py
COPY --from=base /workspace/torch_lib_versions.txt torch_lib_versions.txt
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --python /opt/venv/bin/python3 -r requirements/dev.txt \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
echo "Installing dev requirements plus torch nightly..." \
&& python3 use_existing_torch.py --prefix \
&& cat torch_lib_versions.txt >> requirements/test.in \
&& uv pip compile requirements/test.in -o requirements/test.txt --index-strategy unsafe-best-match \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') \
&& uv pip install --python /opt/venv/bin/python3 $(cat torch_lib_versions.txt | xargs) --pre \
-r requirements/dev.txt \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
else \
echo "Installing dev requirements..." \
&& uv pip install --python /opt/venv/bin/python3 -r requirements/dev.txt \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
fi
#################### DEV IMAGE ####################
#################### vLLM installation IMAGE ####################
# image with vLLM installed
@@ -548,11 +639,26 @@ ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
ARG PYTORCH_CUDA_INDEX_BASE_URL
ARG PIP_KEYRING_PROVIDER UV_KEYRING_PROVIDER
# Install vllm wheel first, so that torch etc will be installed.
# We can specify the standard or nightly build of PyTorch
ARG PYTORCH_NIGHTLY
# Install vLLM wheel first, so that torch etc will be installed.
# Check whether to install torch nightly instead of release for this build.
COPY --from=base /workspace/torch_lib_versions.txt torch_lib_versions.txt
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \
--mount=type=cache,target=/root/.cache/uv \
uv pip install --system dist/*.whl --verbose \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
echo "Installing torch nightly..." \
&& uv pip install --system $(cat torch_lib_versions.txt | xargs) --pre \
--index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') \
&& echo "Installing vLLM..." \
&& uv pip install --system dist/*.whl --verbose \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
else \
echo "Installing vLLM..." \
&& uv pip install --system dist/*.whl --verbose \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
fi
RUN --mount=type=cache,target=/root/.cache/uv \
. /etc/environment && \
@@ -612,12 +718,33 @@ RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
&& apt-get update -y \
&& apt-get install -y git
# install development dependencies (for testing)
# We can specify the standard or nightly build of PyTorch
ARG PYTORCH_NIGHTLY
# Install development dependencies (for testing)
COPY requirements/lint.txt requirements/lint.txt
COPY requirements/test.in requirements/test.in
COPY requirements/test.txt requirements/test.txt
COPY requirements/dev.txt requirements/dev.txt
COPY use_existing_torch.py use_existing_torch.py
COPY --from=base /workspace/torch_lib_versions.txt torch_lib_versions.txt
RUN --mount=type=cache,target=/root/.cache/uv \
CUDA_MAJOR="${CUDA_VERSION%%.*}"; \
if [ "$CUDA_MAJOR" -ge 12 ]; then \
uv pip install --system -r requirements/dev.txt \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
if [ "${PYTORCH_NIGHTLY}" = "1" ]; then \
echo "Installing dev requirements plus torch nightly..." \
&& python3 use_existing_torch.py --prefix \
&& cat torch_lib_versions.txt >> requirements/test.in \
&& uv pip compile requirements/test.in -o requirements/test.txt --index-strategy unsafe-best-match \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') \
&& uv pip install --system $(cat torch_lib_versions.txt | xargs) --pre \
-r requirements/dev.txt \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/nightly/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
else \
echo "Installing dev requirements..." \
&& uv pip install --system -r requirements/dev.txt \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
fi \
fi
# install development dependencies (for testing)

View File

@@ -1,3 +1,11 @@
#######
#
# THIS FILE IS DEPRECATED AND WILL BE REMOVED SHORTLY
#
# Please use the standard Dockerfile with PYTORCH_NIGHTLY=1 instead
#
#######
# The vLLM Dockerfile is used to construct vLLM image against torch nightly that can be directly used for testing
# for torch nightly, cuda >=12.6 is required,

Binary file not shown.

Before

Width:  |  Height:  |  Size: 205 KiB

After

Width:  |  Height:  |  Size: 325 KiB

View File

@@ -1,18 +1,54 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import argparse
import glob
import sys
for file in (*glob.glob("requirements/*.txt"), "pyproject.toml"):
print(f">>> cleaning {file}")
with open(file) as f:
lines = f.readlines()
if "torch" in "".join(lines).lower():
print("removed:")
with open(file, "w") as f:
for line in lines:
if "torch" not in line.lower():
f.write(line)
else:
print(line.strip())
print(f"<<< done cleaning {file}\n")
# Only strip targeted libraries when checking prefix
TORCH_LIB_PREFIXES = (
# requirements/*.txt/in
"torch=",
"torchvision=",
"torchaudio=",
# pyproject.toml
'"torch =',
'"torchvision =',
'"torchaudio =',
)
def main(argv):
parser = argparse.ArgumentParser(
description="Strip torch lib requirements to use installed version."
)
parser.add_argument(
"--prefix",
action="store_true",
help="Strip prefix matches only (default: False)",
)
args = parser.parse_args(argv)
for file in (
*glob.glob("requirements/*.txt"),
*glob.glob("requirements/*.in"),
"pyproject.toml",
):
with open(file) as f:
lines = f.readlines()
if "torch" in "".join(lines).lower():
with open(file, "w") as f:
for line in lines:
if (
args.prefix
and not line.lower().strip().startswith(TORCH_LIB_PREFIXES)
or not args.prefix
and "torch" not in line.lower()
):
f.write(line)
else:
print(f">>> removed from {file}:", line.strip())
if __name__ == "__main__":
main(sys.argv[1:])