diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index e034f75a9..e2ce09459 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -147,6 +147,13 @@ repos:
     entry: python tools/pre_commit/validate_config.py
     language: python
     additional_dependencies: [regex]
+  - id: validate-docker-versions
+    name: Validate docker/versions.json matches Dockerfile
+    entry: python tools/generate_versions_json.py --check
+    language: python
+    files: ^docker/(Dockerfile|versions\.json)$
+    pass_filenames: false
+    additional_dependencies: [dockerfile-parse]
   # Keep `suggestion` last
   - id: suggestion
     name: Suggestion
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 7ac13e771..d4ecf96b1 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -5,6 +5,23 @@
 # docs/contributing/dockerfile/dockerfile.md and
 # docs/assets/contributing/dockerfile-stages-dependency.png
 
+# =============================================================================
+# VERSION MANAGEMENT
+# =============================================================================
+# ARG defaults in this Dockerfile are the source of truth for pinned versions.
+# docker/versions.json is auto-generated for use with docker buildx bake.
+#
+# When updating versions:
+# 1. Edit the ARG defaults below
+# 2. Run: python tools/generate_versions_json.py
+#
+# To query versions programmatically:
+#   jq -r '.variable.CUDA_VERSION.default' docker/versions.json
+#
+# To build with bake:
+#   docker buildx bake -f docker/docker-bake.hcl -f docker/versions.json
+# =============================================================================
+
 ARG CUDA_VERSION=12.9.1
 ARG PYTHON_VERSION=3.12
 
@@ -141,6 +158,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
 # CUDA arch list used by torch
 # Explicitly set the list to avoid issues with torch 2.2
 # See https://github.com/pytorch/pytorch/pull/123243
+# From versions.json: .torch.cuda_arch_list
 ARG torch_cuda_arch_list='7.0 7.5 8.0 8.9 9.0 10.0 12.0'
 ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
 #################### BUILD BASE IMAGE ####################
@@ -256,7 +274,8 @@ ENV UV_LINK_MODE=copy
 WORKDIR /workspace
 
 # Build DeepGEMM wheel
-ARG DEEPGEMM_GIT_REF
+# Default moved here from tools/install_deepgemm.sh for centralized version management
+ARG DEEPGEMM_GIT_REF=594953acce41793ae00a1233eb516044d604bcb6
 COPY tools/install_deepgemm.sh /tmp/install_deepgemm.sh
 RUN --mount=type=cache,target=/root/.cache/uv \
     mkdir -p /tmp/deepgemm/dist && \
@@ -271,8 +290,9 @@ RUN mkdir -p /tmp/deepgemm/dist && touch /tmp/deepgemm/dist/.deepgemm_skipped
 
 # Build pplx-kernels and DeepEP wheels
 COPY tools/ep_kernels/install_python_libraries.sh /tmp/install_python_libraries.sh
-ARG PPLX_COMMIT_HASH
-ARG DEEPEP_COMMIT_HASH
+# Defaults moved here from tools/ep_kernels/install_python_libraries.sh for centralized version management
+ARG PPLX_COMMIT_HASH=12cecfd
+ARG DEEPEP_COMMIT_HASH=73b6ea4
 ARG NVSHMEM_VER
 RUN --mount=type=cache,target=/root/.cache/uv \
     mkdir -p /tmp/ep_kernels_workspace/dist && \
@@ -474,6 +494,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
 # Install FlashInfer pre-compiled kernel cache and binaries
 # This is ~1.1GB and only changes when FlashInfer version bumps
 # https://docs.flashinfer.ai/installation.html
+# From versions.json: .flashinfer.version
 ARG FLASHINFER_VERSION=0.5.3
 RUN --mount=type=cache,target=/root/.cache/uv \
     uv pip install --system flashinfer-cubin==${FLASHINFER_VERSION} \
@@ -503,14 +524,20 @@ RUN set -eux; \
 
 # Install vllm-openai dependencies (saves ~2.6s per build)
 # These are stable packages that don't depend on vLLM itself
+# From versions.json: .bitsandbytes.x86_64, .bitsandbytes.arm64
+# From versions.json: .openai_server_extras.timm, .openai_server_extras.runai_model_streamer
+ARG BITSANDBYTES_VERSION_X86=0.46.1
+ARG BITSANDBYTES_VERSION_ARM64=0.42.0
+ARG TIMM_VERSION=">=1.0.17"
+ARG RUNAI_MODEL_STREAMER_VERSION=">=0.15.3"
 RUN --mount=type=cache,target=/root/.cache/uv \
     if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
-        BITSANDBYTES_VERSION="0.42.0"; \
+        BITSANDBYTES_VERSION="${BITSANDBYTES_VERSION_ARM64}"; \
     else \
-        BITSANDBYTES_VERSION="0.46.1"; \
+        BITSANDBYTES_VERSION="${BITSANDBYTES_VERSION_X86}"; \
     fi; \
     uv pip install --system accelerate hf_transfer modelscope \
-        "bitsandbytes>=${BITSANDBYTES_VERSION}" 'timm>=1.0.17' 'runai-model-streamer[s3,gcs]>=0.15.3'
+        "bitsandbytes>=${BITSANDBYTES_VERSION}" "timm${TIMM_VERSION}" "runai-model-streamer[s3,gcs]${RUNAI_MODEL_STREAMER_VERSION}"
 
 # ============================================================
 # VLLM INSTALLATION (depends on build stage)
diff --git a/docker/versions.json b/docker/versions.json
new file mode 100644
index 000000000..045955bc4
--- /dev/null
+++ b/docker/versions.json
@@ -0,0 +1,92 @@
+{
+  "_comment": "Auto-generated from Dockerfile ARGs. Do not edit manually. Run: python tools/generate_versions_json.py",
+  "variable": {
+    "CUDA_VERSION": {
+      "default": "12.9.1"
+    },
+    "PYTHON_VERSION": {
+      "default": "3.12"
+    },
+    "BUILD_BASE_IMAGE": {
+      "default": "nvidia/cuda:12.9.1-devel-ubuntu20.04"
+    },
+    "FINAL_BASE_IMAGE": {
+      "default": "nvidia/cuda:12.9.1-base-ubuntu22.04"
+    },
+    "GET_PIP_URL": {
+      "default": "https://bootstrap.pypa.io/get-pip.py"
+    },
+    "PYTORCH_CUDA_INDEX_BASE_URL": {
+      "default": "https://download.pytorch.org/whl"
+    },
+    "PIP_KEYRING_PROVIDER": {
+      "default": "disabled"
+    },
+    "UV_KEYRING_PROVIDER": {
+      "default": "disabled"
+    },
+    "INSTALL_KV_CONNECTORS": {
+      "default": "false"
+    },
+    "TORCH_CUDA_ARCH_LIST": {
+      "default": "7.0 7.5 8.0 8.9 9.0 10.0 12.0"
+    },
+    "MAX_JOBS": {
+      "default": "2"
+    },
+    "NVCC_THREADS": {
+      "default": "8"
+    },
+    "SCCACHE_BUCKET_NAME": {
+      "default": "vllm-build-sccache"
+    },
+    "SCCACHE_REGION_NAME": {
+      "default": "us-west-2"
+    },
+    "SCCACHE_S3_NO_CREDENTIALS": {
+      "default": "0"
+    },
+    "vllm_target_device": {
+      "default": "cuda"
+    },
+    "DEEPGEMM_GIT_REF": {
+      "default": "594953acce41793ae00a1233eb516044d604bcb6"
+    },
+    "PPLX_COMMIT_HASH": {
+      "default": "12cecfd"
+    },
+    "DEEPEP_COMMIT_HASH": {
+      "default": "73b6ea4"
+    },
+    "GIT_REPO_CHECK": {
+      "default": "0"
+    },
+    "VLLM_MAX_SIZE_MB": {
+      "default": "500"
+    },
+    "RUN_WHEEL_CHECK": {
+      "default": "true"
+    },
+    "FLASHINFER_VERSION": {
+      "default": "0.5.3"
+    },
+    "GDRCOPY_CUDA_VERSION": {
+      "default": "12.8"
+    },
+    "GDRCOPY_OS_VERSION": {
+      "default": "Ubuntu22_04"
+    },
+    "BITSANDBYTES_VERSION_X86": {
+      "default": "0.46.1"
+    },
+    "BITSANDBYTES_VERSION_ARM64": {
+      "default": "0.42.0"
+    },
+    "TIMM_VERSION": {
+      "default": ">=1.0.17"
+    },
+    "RUNAI_MODEL_STREAMER_VERSION": {
+      "default": ">=0.15.3"
+    }
+  }
+}
diff --git a/tools/generate_versions_json.py b/tools/generate_versions_json.py
new file mode 100755
index 000000000..f5d2893a9
--- /dev/null
+++ b/tools/generate_versions_json.py
@@ -0,0 +1,139 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Generate docker/versions.json from Dockerfile ARG defaults.
+
+This script parses the Dockerfile and extracts ARG defaults to create
+a bake-native versions.json file that can be used directly with:
+    docker buildx bake -f docker/docker-bake.hcl -f docker/versions.json
+
+Usage:
+    python tools/generate_versions_json.py [--check]
+
+Options:
+    --check    Verify versions.json matches Dockerfile (for CI validation)
+
+Requirements:
+    pip install dockerfile-parse
+"""
+
+import json
+import sys
+from pathlib import Path
+
+from dockerfile_parse import DockerfileParser
+
+REPO_ROOT = Path(__file__).resolve().parent.parent
+DOCKERFILE = REPO_ROOT / "docker" / "Dockerfile"
+VERSIONS_JSON = REPO_ROOT / "docker" / "versions.json"
+
+# Map Dockerfile ARG names (lowercase) to bake variable names (uppercase)
+# This matches docker-bake.hcl variable naming convention
+BAKE_VAR_NAMES = {
+    "torch_cuda_arch_list": "TORCH_CUDA_ARCH_LIST",
+    "max_jobs": "MAX_JOBS",
+    "nvcc_threads": "NVCC_THREADS",
+}
+
+
+def parse_dockerfile_args(dockerfile_path: Path) -> dict[str, str]:
+    """Extract all ARG defaults from Dockerfile using dockerfile-parse."""
+    parser = DockerfileParser(path=str(dockerfile_path))
+
+    # Extract ARGs from structure (more reliable for multi-stage Dockerfiles)
+    args = {}
+    for item in parser.structure:
+        if item["instruction"] != "ARG":
+            continue
+
+        value = item["value"]
+        if "=" not in value:
+            continue
+
+        # Parse ARG NAME=value (handle quotes)
+        name, _, default = value.partition("=")
+        name = name.strip()
+
+        if name in args:
+            # Keep first occurrence
+            continue
+
+        # Strip surrounding quotes if present
+        default = default.strip()
+        if (default.startswith('"') and default.endswith('"')) or (
+            default.startswith("'") and default.endswith("'")
+        ):
+            default = default[1:-1]
+
+        if default:
+            args[name] = default
+
+    # Resolve variable interpolation (e.g., ${CUDA_VERSION} -> 12.9.1)
+    resolved = {}
+    for name, value in args.items():
+        if "${" in value:
+            # Substitute ${VAR} references with their values
+            for ref_name, ref_value in args.items():
+                value = value.replace(f"${{{ref_name}}}", ref_value)
+        # Skip if still has unresolved references (no default available)
+        if "${" not in value:
+            resolved[name] = value
+
+    return resolved
+
+
+def generate_bake_native_json(args: dict[str, str]) -> dict:
+    """Generate bake-native JSON structure."""
+    variables = {}
+    for name, value in args.items():
+        # Use uppercase bake variable name if mapped, otherwise keep as-is
+        bake_name = BAKE_VAR_NAMES.get(name, name)
+        variables[bake_name] = {"default": value}
+
+    return {
+        "_comment": (
+            "Auto-generated from Dockerfile ARGs. "
+            "Do not edit manually. Run: python tools/generate_versions_json.py"
+        ),
+        "variable": variables,
+    }
+
+
+def main():
+    check_mode = "--check" in sys.argv
+
+    # Parse Dockerfile
+    args = parse_dockerfile_args(DOCKERFILE)
+
+    # Generate bake-native JSON
+    data = generate_bake_native_json(args)
+    new_content = json.dumps(data, indent=2) + "\n"
+
+    if check_mode:
+        # Verify existing file matches
+        if not VERSIONS_JSON.exists():
+            print(f"ERROR: {VERSIONS_JSON} does not exist")
+            sys.exit(1)
+
+        existing_content = VERSIONS_JSON.read_text()
+        if existing_content != new_content:
+            print("ERROR: docker/versions.json is out of sync with Dockerfile")
+            print("Run: python tools/generate_versions_json.py")
+            sys.exit(1)
+
+        print("✅ docker/versions.json is in sync with Dockerfile")
+        sys.exit(0)
+
+    # Write versions.json
+    VERSIONS_JSON.write_text(new_content)
+    print(f"✅ Generated {VERSIONS_JSON}")
+
+    # Print summary
+    print("\nExtracted versions:")
+    for name, value in args.items():
+        print(f"  {name}: {value}")
+
+
+if __name__ == "__main__":
+    main()