diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e034f75a9..e2ce09459 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -147,6 +147,13 @@ repos: entry: python tools/pre_commit/validate_config.py language: python additional_dependencies: [regex] + - id: validate-docker-versions + name: Validate docker/versions.json matches Dockerfile + entry: python tools/generate_versions_json.py --check + language: python + files: ^docker/(Dockerfile|versions\.json)$ + pass_filenames: false + additional_dependencies: [dockerfile-parse] # Keep `suggestion` last - id: suggestion name: Suggestion diff --git a/docker/Dockerfile b/docker/Dockerfile index 7ac13e771..d4ecf96b1 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -5,6 +5,23 @@ # docs/contributing/dockerfile/dockerfile.md and # docs/assets/contributing/dockerfile-stages-dependency.png +# ============================================================================= +# VERSION MANAGEMENT +# ============================================================================= +# ARG defaults in this Dockerfile are the source of truth for pinned versions. +# docker/versions.json is auto-generated for use with docker buildx bake. +# +# When updating versions: +# 1. Edit the ARG defaults below +# 2. Run: python tools/generate_versions_json.py +# +# To query versions programmatically: +# jq -r '.variable.CUDA_VERSION.default' docker/versions.json +# +# To build with bake: +# docker buildx bake -f docker/docker-bake.hcl -f docker/versions.json +# ============================================================================= + ARG CUDA_VERSION=12.9.1 ARG PYTHON_VERSION=3.12 @@ -141,6 +158,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \ # CUDA arch list used by torch # Explicitly set the list to avoid issues with torch 2.2 # See https://github.com/pytorch/pytorch/pull/123243 +# From versions.json: .torch.cuda_arch_list ARG torch_cuda_arch_list='7.0 7.5 8.0 8.9 9.0 10.0 12.0' ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list} #################### BUILD BASE IMAGE #################### @@ -256,7 +274,8 @@ ENV UV_LINK_MODE=copy WORKDIR /workspace # Build DeepGEMM wheel -ARG DEEPGEMM_GIT_REF +# Default moved here from tools/install_deepgemm.sh for centralized version management +ARG DEEPGEMM_GIT_REF=594953acce41793ae00a1233eb516044d604bcb6 COPY tools/install_deepgemm.sh /tmp/install_deepgemm.sh RUN --mount=type=cache,target=/root/.cache/uv \ mkdir -p /tmp/deepgemm/dist && \ @@ -271,8 +290,9 @@ RUN mkdir -p /tmp/deepgemm/dist && touch /tmp/deepgemm/dist/.deepgemm_skipped # Build pplx-kernels and DeepEP wheels COPY tools/ep_kernels/install_python_libraries.sh /tmp/install_python_libraries.sh -ARG PPLX_COMMIT_HASH -ARG DEEPEP_COMMIT_HASH +# Defaults moved here from tools/ep_kernels/install_python_libraries.sh for centralized version management +ARG PPLX_COMMIT_HASH=12cecfd +ARG DEEPEP_COMMIT_HASH=73b6ea4 ARG NVSHMEM_VER RUN --mount=type=cache,target=/root/.cache/uv \ mkdir -p /tmp/ep_kernels_workspace/dist && \ @@ -474,6 +494,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \ # Install FlashInfer pre-compiled kernel cache and binaries # This is ~1.1GB and only changes when FlashInfer version bumps # https://docs.flashinfer.ai/installation.html +# From versions.json: .flashinfer.version ARG FLASHINFER_VERSION=0.5.3 RUN --mount=type=cache,target=/root/.cache/uv \ uv pip install --system flashinfer-cubin==${FLASHINFER_VERSION} \ @@ -503,14 +524,20 @@ RUN set -eux; \ # Install vllm-openai dependencies (saves ~2.6s per build) # These are stable packages that don't depend on vLLM itself +# From versions.json: .bitsandbytes.x86_64, .bitsandbytes.arm64 +# From versions.json: .openai_server_extras.timm, .openai_server_extras.runai_model_streamer +ARG BITSANDBYTES_VERSION_X86=0.46.1 +ARG BITSANDBYTES_VERSION_ARM64=0.42.0 +ARG TIMM_VERSION=">=1.0.17" +ARG RUNAI_MODEL_STREAMER_VERSION=">=0.15.3" RUN --mount=type=cache,target=/root/.cache/uv \ if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \ - BITSANDBYTES_VERSION="0.42.0"; \ + BITSANDBYTES_VERSION="${BITSANDBYTES_VERSION_ARM64}"; \ else \ - BITSANDBYTES_VERSION="0.46.1"; \ + BITSANDBYTES_VERSION="${BITSANDBYTES_VERSION_X86}"; \ fi; \ uv pip install --system accelerate hf_transfer modelscope \ - "bitsandbytes>=${BITSANDBYTES_VERSION}" 'timm>=1.0.17' 'runai-model-streamer[s3,gcs]>=0.15.3' + "bitsandbytes>=${BITSANDBYTES_VERSION}" "timm${TIMM_VERSION}" "runai-model-streamer[s3,gcs]${RUNAI_MODEL_STREAMER_VERSION}" # ============================================================ # VLLM INSTALLATION (depends on build stage) diff --git a/docker/versions.json b/docker/versions.json new file mode 100644 index 000000000..045955bc4 --- /dev/null +++ b/docker/versions.json @@ -0,0 +1,92 @@ +{ + "_comment": "Auto-generated from Dockerfile ARGs. Do not edit manually. Run: python tools/generate_versions_json.py", + "variable": { + "CUDA_VERSION": { + "default": "12.9.1" + }, + "PYTHON_VERSION": { + "default": "3.12" + }, + "BUILD_BASE_IMAGE": { + "default": "nvidia/cuda:12.9.1-devel-ubuntu20.04" + }, + "FINAL_BASE_IMAGE": { + "default": "nvidia/cuda:12.9.1-base-ubuntu22.04" + }, + "GET_PIP_URL": { + "default": "https://bootstrap.pypa.io/get-pip.py" + }, + "PYTORCH_CUDA_INDEX_BASE_URL": { + "default": "https://download.pytorch.org/whl" + }, + "PIP_KEYRING_PROVIDER": { + "default": "disabled" + }, + "UV_KEYRING_PROVIDER": { + "default": "disabled" + }, + "INSTALL_KV_CONNECTORS": { + "default": "false" + }, + "TORCH_CUDA_ARCH_LIST": { + "default": "7.0 7.5 8.0 8.9 9.0 10.0 12.0" + }, + "MAX_JOBS": { + "default": "2" + }, + "NVCC_THREADS": { + "default": "8" + }, + "SCCACHE_BUCKET_NAME": { + "default": "vllm-build-sccache" + }, + "SCCACHE_REGION_NAME": { + "default": "us-west-2" + }, + "SCCACHE_S3_NO_CREDENTIALS": { + "default": "0" + }, + "vllm_target_device": { + "default": "cuda" + }, + "DEEPGEMM_GIT_REF": { + "default": "594953acce41793ae00a1233eb516044d604bcb6" + }, + "PPLX_COMMIT_HASH": { + "default": "12cecfd" + }, + "DEEPEP_COMMIT_HASH": { + "default": "73b6ea4" + }, + "GIT_REPO_CHECK": { + "default": "0" + }, + "VLLM_MAX_SIZE_MB": { + "default": "500" + }, + "RUN_WHEEL_CHECK": { + "default": "true" + }, + "FLASHINFER_VERSION": { + "default": "0.5.3" + }, + "GDRCOPY_CUDA_VERSION": { + "default": "12.8" + }, + "GDRCOPY_OS_VERSION": { + "default": "Ubuntu22_04" + }, + "BITSANDBYTES_VERSION_X86": { + "default": "0.46.1" + }, + "BITSANDBYTES_VERSION_ARM64": { + "default": "0.42.0" + }, + "TIMM_VERSION": { + "default": ">=1.0.17" + }, + "RUNAI_MODEL_STREAMER_VERSION": { + "default": ">=0.15.3" + } + } +} diff --git a/tools/generate_versions_json.py b/tools/generate_versions_json.py new file mode 100755 index 000000000..f5d2893a9 --- /dev/null +++ b/tools/generate_versions_json.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +""" +Generate docker/versions.json from Dockerfile ARG defaults. + +This script parses the Dockerfile and extracts ARG defaults to create +a bake-native versions.json file that can be used directly with: + docker buildx bake -f docker/docker-bake.hcl -f docker/versions.json + +Usage: + python tools/generate_versions_json.py [--check] + +Options: + --check Verify versions.json matches Dockerfile (for CI validation) + +Requirements: + pip install dockerfile-parse +""" + +import json +import sys +from pathlib import Path + +from dockerfile_parse import DockerfileParser + +REPO_ROOT = Path(__file__).resolve().parent.parent +DOCKERFILE = REPO_ROOT / "docker" / "Dockerfile" +VERSIONS_JSON = REPO_ROOT / "docker" / "versions.json" + +# Map Dockerfile ARG names (lowercase) to bake variable names (uppercase) +# This matches docker-bake.hcl variable naming convention +BAKE_VAR_NAMES = { + "torch_cuda_arch_list": "TORCH_CUDA_ARCH_LIST", + "max_jobs": "MAX_JOBS", + "nvcc_threads": "NVCC_THREADS", +} + + +def parse_dockerfile_args(dockerfile_path: Path) -> dict[str, str]: + """Extract all ARG defaults from Dockerfile using dockerfile-parse.""" + parser = DockerfileParser(path=str(dockerfile_path)) + + # Extract ARGs from structure (more reliable for multi-stage Dockerfiles) + args = {} + for item in parser.structure: + if item["instruction"] != "ARG": + continue + + value = item["value"] + if "=" not in value: + continue + + # Parse ARG NAME=value (handle quotes) + name, _, default = value.partition("=") + name = name.strip() + + if name in args: + # Keep first occurrence + continue + + # Strip surrounding quotes if present + default = default.strip() + if (default.startswith('"') and default.endswith('"')) or ( + default.startswith("'") and default.endswith("'") + ): + default = default[1:-1] + + if default: + args[name] = default + + # Resolve variable interpolation (e.g., ${CUDA_VERSION} -> 12.9.1) + resolved = {} + for name, value in args.items(): + if "${" in value: + # Substitute ${VAR} references with their values + for ref_name, ref_value in args.items(): + value = value.replace(f"${{{ref_name}}}", ref_value) + # Skip if still has unresolved references (no default available) + if "${" not in value: + resolved[name] = value + + return resolved + + +def generate_bake_native_json(args: dict[str, str]) -> dict: + """Generate bake-native JSON structure.""" + variables = {} + for name, value in args.items(): + # Use uppercase bake variable name if mapped, otherwise keep as-is + bake_name = BAKE_VAR_NAMES.get(name, name) + variables[bake_name] = {"default": value} + + return { + "_comment": ( + "Auto-generated from Dockerfile ARGs. " + "Do not edit manually. Run: python tools/generate_versions_json.py" + ), + "variable": variables, + } + + +def main(): + check_mode = "--check" in sys.argv + + # Parse Dockerfile + args = parse_dockerfile_args(DOCKERFILE) + + # Generate bake-native JSON + data = generate_bake_native_json(args) + new_content = json.dumps(data, indent=2) + "\n" + + if check_mode: + # Verify existing file matches + if not VERSIONS_JSON.exists(): + print(f"ERROR: {VERSIONS_JSON} does not exist") + sys.exit(1) + + existing_content = VERSIONS_JSON.read_text() + if existing_content != new_content: + print("ERROR: docker/versions.json is out of sync with Dockerfile") + print("Run: python tools/generate_versions_json.py") + sys.exit(1) + + print("✅ docker/versions.json is in sync with Dockerfile") + sys.exit(0) + + # Write versions.json + VERSIONS_JSON.write_text(new_content) + print(f"✅ Generated {VERSIONS_JSON}") + + # Print summary + print("\nExtracted versions:") + for name, value in args.items(): + print(f" {name}: {value}") + + +if __name__ == "__main__": + main()