diff --git a/docker/Dockerfile b/docker/Dockerfile index 1b937bbc1..e03b9989a 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -20,8 +20,8 @@ ARG PYTHON_VERSION=3.12 # glibc version is baked into the distro, and binaries built with one glibc # version are not backwards compatible with OSes that use an earlier version. ARG BUILD_BASE_IMAGE=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04 -# TODO: Restore to base image after FlashInfer AOT wheel fixed -ARG FINAL_BASE_IMAGE=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 +# Using cuda base image with minimal dependencies necessary for JIT compilation (FlashInfer, DeepGEMM, EP kernels) +ARG FINAL_BASE_IMAGE=nvidia/cuda:${CUDA_VERSION}-base-ubuntu22.04 # By parameterizing the Deadsnakes repository URL, we allow third-party to use # their own mirror. When doing so, we don't benefit from the transparent @@ -328,6 +328,18 @@ RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \ && curl -sS ${GET_PIP_URL} | python${PYTHON_VERSION} \ && python3 --version && python3 -m pip --version +# Install CUDA development tools and build essentials for runtime JIT compilation +# (FlashInfer, DeepGEMM, EP kernels all require compilation at runtime) +RUN CUDA_VERSION_DASH=$(echo $CUDA_VERSION | cut -d. -f1,2 | tr '.' '-') && \ + apt-get update -y && \ + apt-get install -y --no-install-recommends \ + cuda-nvcc-${CUDA_VERSION_DASH} \ + cuda-cudart-${CUDA_VERSION_DASH} \ + cuda-nvrtc-${CUDA_VERSION_DASH} \ + cuda-cuobjdump-${CUDA_VERSION_DASH} \ + libcublas-${CUDA_VERSION_DASH} && \ + rm -rf /var/lib/apt/lists/* + ARG PIP_INDEX_URL UV_INDEX_URL ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL ARG PYTORCH_CUDA_INDEX_BASE_URL diff --git a/docs/assets/contributing/dockerfile-stages-dependency.png b/docs/assets/contributing/dockerfile-stages-dependency.png index 57a33524a..b327eb215 100644 Binary files a/docs/assets/contributing/dockerfile-stages-dependency.png and b/docs/assets/contributing/dockerfile-stages-dependency.png differ