[CI] Fix FlashInfer AOT in release docker image (#25730)

Signed-off-by: mgoin <mgoin64@gmail.com> Signed-off-by: simon-mo <simon.mo@hey.com>
2025-09-26 17:11:40 -04:00
parent c2fa2d4dc9
commit 5aa5811a16
2 changed files with 4 additions and 1 deletions
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -404,6 +404,9 @@ RUN --mount=type=cache,target=/root/.cache/uv bash - <<'BASH'
                FI_TORCH_CUDA_ARCH_LIST="7.5 8.0 8.9 9.0a 10.0a 12.0"
            fi
            echo "🏗️  Installing FlashInfer with AOT compilation for arches: ${FI_TORCH_CUDA_ARCH_LIST}"
+            export FLASHINFER_CUDA_ARCH_LIST="${FI_TORCH_CUDA_ARCH_LIST}"
+            # HACK: We need these to run flashinfer.aot before installing flashinfer, get from the package in the future
+            uv pip install --system cuda-python==$(echo $CUDA_VERSION | cut -d. -f1,2) pynvml==$(echo $CUDA_VERSION | cut -d. -f1) nvidia-nvshmem-cu$(echo $CUDA_VERSION | cut -d. -f1)
            # Build AOT kernels
            TORCH_CUDA_ARCH_LIST="${FI_TORCH_CUDA_ARCH_LIST}" \
                python3 -m flashinfer.aot