Use prebuilt FlashInfer x86_64 PyTorch 2.7 CUDA 12.8 wheel for CI (#18537)

Signed-off-by: Huy Do <huydhn@gmail.com>
2025-05-23 14:17:16 -07:00
parent 2628a69e35
commit 1645b60196
1 changed files with 8 additions and 9 deletions
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -257,18 +257,17 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist
 RUN --mount=type=cache,target=/root/.cache/uv \
 . /etc/environment && \
 if [ "$TARGETPLATFORM" != "linux/arm64" ]; then \
-    # uv pip install --system https://github.com/flashinfer-ai/flashinfer/releases/download/v0.2.4/flashinfer_python-0.2.4+cu124torch2.6-cp38-abi3-linux_x86_64.whl ; \
+    # FlashInfer alreary has a wheel for PyTorch 2.7.0 and CUDA 12.8. This is enough for CI use
    # TESTING: install FlashInfer from source to test 2.7.0 final RC
    if [[ "$CUDA_VERSION" == 12.8* ]]; then \
-        export TORCH_CUDA_ARCH_LIST='7.5 8.0 8.9 9.0 10.0+PTX'; \
+        uv pip install --system https://download.pytorch.org/whl/cu128/flashinfer/flashinfer_python-0.2.5%2Bcu128torch2.7-cp38-abi3-linux_x86_64.whl; \
    else \
        export TORCH_CUDA_ARCH_LIST='7.5 8.0 8.9 9.0+PTX'; \
-    fi; \
+        CUDA_MAJOR="${CUDA_VERSION%%.*}"; \
-    CUDA_MAJOR="${CUDA_VERSION%%.*}"; \
+        if [ "$CUDA_MAJOR" -lt 12 ]; then \
-    if [ "$CUDA_MAJOR" -lt 12 ]; then \
+            export FLASHINFER_ENABLE_SM90=0; \
-        export FLASHINFER_ENABLE_SM90=0; \
+        fi; \
-    fi; \
+        uv pip install --system --no-build-isolation "git+https://github.com/flashinfer-ai/flashinfer@21ea1d2545f74782b91eb8c08fd503ac4c0743fc" ; \
-    uv pip install --system --no-build-isolation "git+https://github.com/flashinfer-ai/flashinfer@21ea1d2545f74782b91eb8c08fd503ac4c0743fc" ; \
+    fi \
 fi
 COPY examples examples
 COPY benchmarks benchmarks