Use prebuilt FlashInfer x86_64 PyTorch 2.7 CUDA 12.8 wheel for CI (#18537)

Signed-off-by: Huy Do <huydhn@gmail.com>
This commit is contained in:
Huy Do
2025-05-23 14:17:16 -07:00
committed by GitHub
parent 2628a69e35
commit 1645b60196

View File

@@ -257,18 +257,17 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist
RUN --mount=type=cache,target=/root/.cache/uv \ RUN --mount=type=cache,target=/root/.cache/uv \
. /etc/environment && \ . /etc/environment && \
if [ "$TARGETPLATFORM" != "linux/arm64" ]; then \ if [ "$TARGETPLATFORM" != "linux/arm64" ]; then \
# uv pip install --system https://github.com/flashinfer-ai/flashinfer/releases/download/v0.2.4/flashinfer_python-0.2.4+cu124torch2.6-cp38-abi3-linux_x86_64.whl ; \ # FlashInfer alreary has a wheel for PyTorch 2.7.0 and CUDA 12.8. This is enough for CI use
# TESTING: install FlashInfer from source to test 2.7.0 final RC
if [[ "$CUDA_VERSION" == 12.8* ]]; then \ if [[ "$CUDA_VERSION" == 12.8* ]]; then \
export TORCH_CUDA_ARCH_LIST='7.5 8.0 8.9 9.0 10.0+PTX'; \ uv pip install --system https://download.pytorch.org/whl/cu128/flashinfer/flashinfer_python-0.2.5%2Bcu128torch2.7-cp38-abi3-linux_x86_64.whl; \
else \ else \
export TORCH_CUDA_ARCH_LIST='7.5 8.0 8.9 9.0+PTX'; \ export TORCH_CUDA_ARCH_LIST='7.5 8.0 8.9 9.0+PTX'; \
fi; \ CUDA_MAJOR="${CUDA_VERSION%%.*}"; \
CUDA_MAJOR="${CUDA_VERSION%%.*}"; \ if [ "$CUDA_MAJOR" -lt 12 ]; then \
if [ "$CUDA_MAJOR" -lt 12 ]; then \ export FLASHINFER_ENABLE_SM90=0; \
export FLASHINFER_ENABLE_SM90=0; \ fi; \
fi; \ uv pip install --system --no-build-isolation "git+https://github.com/flashinfer-ai/flashinfer@21ea1d2545f74782b91eb8c08fd503ac4c0743fc" ; \
uv pip install --system --no-build-isolation "git+https://github.com/flashinfer-ai/flashinfer@21ea1d2545f74782b91eb8c08fd503ac4c0743fc" ; \ fi \
fi fi
COPY examples examples COPY examples examples
COPY benchmarks benchmarks COPY benchmarks benchmarks