[CI/Build][Docker] Add centralized version manifest for Docker builds (#31492)

Signed-off-by: Mritunjay Sharma <mritunjay.sharma@chainguard.dev>
This commit is contained in:
Mritunjay Kumar Sharma
2026-01-17 19:15:30 +05:30
committed by GitHub
parent 2b99f210f5
commit 9e078d0582
4 changed files with 271 additions and 6 deletions

View File

@@ -147,6 +147,13 @@ repos:
entry: python tools/pre_commit/validate_config.py
language: python
additional_dependencies: [regex]
- id: validate-docker-versions
name: Validate docker/versions.json matches Dockerfile
entry: python tools/generate_versions_json.py --check
language: python
files: ^docker/(Dockerfile|versions\.json)$
pass_filenames: false
additional_dependencies: [dockerfile-parse]
# Keep `suggestion` last
- id: suggestion
name: Suggestion

View File

@@ -5,6 +5,23 @@
# docs/contributing/dockerfile/dockerfile.md and
# docs/assets/contributing/dockerfile-stages-dependency.png
# =============================================================================
# VERSION MANAGEMENT
# =============================================================================
# ARG defaults in this Dockerfile are the source of truth for pinned versions.
# docker/versions.json is auto-generated for use with docker buildx bake.
#
# When updating versions:
# 1. Edit the ARG defaults below
# 2. Run: python tools/generate_versions_json.py
#
# To query versions programmatically:
# jq -r '.variable.CUDA_VERSION.default' docker/versions.json
#
# To build with bake:
# docker buildx bake -f docker/docker-bake.hcl -f docker/versions.json
# =============================================================================
ARG CUDA_VERSION=12.9.1
ARG PYTHON_VERSION=3.12
@@ -141,6 +158,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
# CUDA arch list used by torch
# Explicitly set the list to avoid issues with torch 2.2
# See https://github.com/pytorch/pytorch/pull/123243
# From versions.json: .torch.cuda_arch_list
ARG torch_cuda_arch_list='7.0 7.5 8.0 8.9 9.0 10.0 12.0'
ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
#################### BUILD BASE IMAGE ####################
@@ -256,7 +274,8 @@ ENV UV_LINK_MODE=copy
WORKDIR /workspace
# Build DeepGEMM wheel
ARG DEEPGEMM_GIT_REF
# Default moved here from tools/install_deepgemm.sh for centralized version management
ARG DEEPGEMM_GIT_REF=594953acce41793ae00a1233eb516044d604bcb6
COPY tools/install_deepgemm.sh /tmp/install_deepgemm.sh
RUN --mount=type=cache,target=/root/.cache/uv \
mkdir -p /tmp/deepgemm/dist && \
@@ -271,8 +290,9 @@ RUN mkdir -p /tmp/deepgemm/dist && touch /tmp/deepgemm/dist/.deepgemm_skipped
# Build pplx-kernels and DeepEP wheels
COPY tools/ep_kernels/install_python_libraries.sh /tmp/install_python_libraries.sh
ARG PPLX_COMMIT_HASH
ARG DEEPEP_COMMIT_HASH
# Defaults moved here from tools/ep_kernels/install_python_libraries.sh for centralized version management
ARG PPLX_COMMIT_HASH=12cecfd
ARG DEEPEP_COMMIT_HASH=73b6ea4
ARG NVSHMEM_VER
RUN --mount=type=cache,target=/root/.cache/uv \
mkdir -p /tmp/ep_kernels_workspace/dist && \
@@ -474,6 +494,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
# Install FlashInfer pre-compiled kernel cache and binaries
# This is ~1.1GB and only changes when FlashInfer version bumps
# https://docs.flashinfer.ai/installation.html
# From versions.json: .flashinfer.version
ARG FLASHINFER_VERSION=0.5.3
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --system flashinfer-cubin==${FLASHINFER_VERSION} \
@@ -503,14 +524,20 @@ RUN set -eux; \
# Install vllm-openai dependencies (saves ~2.6s per build)
# These are stable packages that don't depend on vLLM itself
# From versions.json: .bitsandbytes.x86_64, .bitsandbytes.arm64
# From versions.json: .openai_server_extras.timm, .openai_server_extras.runai_model_streamer
ARG BITSANDBYTES_VERSION_X86=0.46.1
ARG BITSANDBYTES_VERSION_ARM64=0.42.0
ARG TIMM_VERSION=">=1.0.17"
ARG RUNAI_MODEL_STREAMER_VERSION=">=0.15.3"
RUN --mount=type=cache,target=/root/.cache/uv \
if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
BITSANDBYTES_VERSION="0.42.0"; \
BITSANDBYTES_VERSION="${BITSANDBYTES_VERSION_ARM64}"; \
else \
BITSANDBYTES_VERSION="0.46.1"; \
BITSANDBYTES_VERSION="${BITSANDBYTES_VERSION_X86}"; \
fi; \
uv pip install --system accelerate hf_transfer modelscope \
"bitsandbytes>=${BITSANDBYTES_VERSION}" 'timm>=1.0.17' 'runai-model-streamer[s3,gcs]>=0.15.3'
"bitsandbytes>=${BITSANDBYTES_VERSION}" "timm${TIMM_VERSION}" "runai-model-streamer[s3,gcs]${RUNAI_MODEL_STREAMER_VERSION}"
# ============================================================
# VLLM INSTALLATION (depends on build stage)

92
docker/versions.json Normal file
View File

@@ -0,0 +1,92 @@
{
"_comment": "Auto-generated from Dockerfile ARGs. Do not edit manually. Run: python tools/generate_versions_json.py",
"variable": {
"CUDA_VERSION": {
"default": "12.9.1"
},
"PYTHON_VERSION": {
"default": "3.12"
},
"BUILD_BASE_IMAGE": {
"default": "nvidia/cuda:12.9.1-devel-ubuntu20.04"
},
"FINAL_BASE_IMAGE": {
"default": "nvidia/cuda:12.9.1-base-ubuntu22.04"
},
"GET_PIP_URL": {
"default": "https://bootstrap.pypa.io/get-pip.py"
},
"PYTORCH_CUDA_INDEX_BASE_URL": {
"default": "https://download.pytorch.org/whl"
},
"PIP_KEYRING_PROVIDER": {
"default": "disabled"
},
"UV_KEYRING_PROVIDER": {
"default": "disabled"
},
"INSTALL_KV_CONNECTORS": {
"default": "false"
},
"TORCH_CUDA_ARCH_LIST": {
"default": "7.0 7.5 8.0 8.9 9.0 10.0 12.0"
},
"MAX_JOBS": {
"default": "2"
},
"NVCC_THREADS": {
"default": "8"
},
"SCCACHE_BUCKET_NAME": {
"default": "vllm-build-sccache"
},
"SCCACHE_REGION_NAME": {
"default": "us-west-2"
},
"SCCACHE_S3_NO_CREDENTIALS": {
"default": "0"
},
"vllm_target_device": {
"default": "cuda"
},
"DEEPGEMM_GIT_REF": {
"default": "594953acce41793ae00a1233eb516044d604bcb6"
},
"PPLX_COMMIT_HASH": {
"default": "12cecfd"
},
"DEEPEP_COMMIT_HASH": {
"default": "73b6ea4"
},
"GIT_REPO_CHECK": {
"default": "0"
},
"VLLM_MAX_SIZE_MB": {
"default": "500"
},
"RUN_WHEEL_CHECK": {
"default": "true"
},
"FLASHINFER_VERSION": {
"default": "0.5.3"
},
"GDRCOPY_CUDA_VERSION": {
"default": "12.8"
},
"GDRCOPY_OS_VERSION": {
"default": "Ubuntu22_04"
},
"BITSANDBYTES_VERSION_X86": {
"default": "0.46.1"
},
"BITSANDBYTES_VERSION_ARM64": {
"default": "0.42.0"
},
"TIMM_VERSION": {
"default": ">=1.0.17"
},
"RUNAI_MODEL_STREAMER_VERSION": {
"default": ">=0.15.3"
}
}
}

139
tools/generate_versions_json.py Executable file
View File

@@ -0,0 +1,139 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
Generate docker/versions.json from Dockerfile ARG defaults.
This script parses the Dockerfile and extracts ARG defaults to create
a bake-native versions.json file that can be used directly with:
docker buildx bake -f docker/docker-bake.hcl -f docker/versions.json
Usage:
python tools/generate_versions_json.py [--check]
Options:
--check Verify versions.json matches Dockerfile (for CI validation)
Requirements:
pip install dockerfile-parse
"""
import json
import sys
from pathlib import Path
from dockerfile_parse import DockerfileParser
REPO_ROOT = Path(__file__).resolve().parent.parent
DOCKERFILE = REPO_ROOT / "docker" / "Dockerfile"
VERSIONS_JSON = REPO_ROOT / "docker" / "versions.json"
# Map Dockerfile ARG names (lowercase) to bake variable names (uppercase)
# This matches docker-bake.hcl variable naming convention
BAKE_VAR_NAMES = {
"torch_cuda_arch_list": "TORCH_CUDA_ARCH_LIST",
"max_jobs": "MAX_JOBS",
"nvcc_threads": "NVCC_THREADS",
}
def parse_dockerfile_args(dockerfile_path: Path) -> dict[str, str]:
"""Extract all ARG defaults from Dockerfile using dockerfile-parse."""
parser = DockerfileParser(path=str(dockerfile_path))
# Extract ARGs from structure (more reliable for multi-stage Dockerfiles)
args = {}
for item in parser.structure:
if item["instruction"] != "ARG":
continue
value = item["value"]
if "=" not in value:
continue
# Parse ARG NAME=value (handle quotes)
name, _, default = value.partition("=")
name = name.strip()
if name in args:
# Keep first occurrence
continue
# Strip surrounding quotes if present
default = default.strip()
if (default.startswith('"') and default.endswith('"')) or (
default.startswith("'") and default.endswith("'")
):
default = default[1:-1]
if default:
args[name] = default
# Resolve variable interpolation (e.g., ${CUDA_VERSION} -> 12.9.1)
resolved = {}
for name, value in args.items():
if "${" in value:
# Substitute ${VAR} references with their values
for ref_name, ref_value in args.items():
value = value.replace(f"${{{ref_name}}}", ref_value)
# Skip if still has unresolved references (no default available)
if "${" not in value:
resolved[name] = value
return resolved
def generate_bake_native_json(args: dict[str, str]) -> dict:
"""Generate bake-native JSON structure."""
variables = {}
for name, value in args.items():
# Use uppercase bake variable name if mapped, otherwise keep as-is
bake_name = BAKE_VAR_NAMES.get(name, name)
variables[bake_name] = {"default": value}
return {
"_comment": (
"Auto-generated from Dockerfile ARGs. "
"Do not edit manually. Run: python tools/generate_versions_json.py"
),
"variable": variables,
}
def main():
check_mode = "--check" in sys.argv
# Parse Dockerfile
args = parse_dockerfile_args(DOCKERFILE)
# Generate bake-native JSON
data = generate_bake_native_json(args)
new_content = json.dumps(data, indent=2) + "\n"
if check_mode:
# Verify existing file matches
if not VERSIONS_JSON.exists():
print(f"ERROR: {VERSIONS_JSON} does not exist")
sys.exit(1)
existing_content = VERSIONS_JSON.read_text()
if existing_content != new_content:
print("ERROR: docker/versions.json is out of sync with Dockerfile")
print("Run: python tools/generate_versions_json.py")
sys.exit(1)
print("✅ docker/versions.json is in sync with Dockerfile")
sys.exit(0)
# Write versions.json
VERSIONS_JSON.write_text(new_content)
print(f"✅ Generated {VERSIONS_JSON}")
# Print summary
print("\nExtracted versions:")
for name, value in args.items():
print(f" {name}: {value}")
if __name__ == "__main__":
main()