Restore CUDA 13.0.1 + patch vLLM for cuMemcpyBatchAsync API change

CUDA 13 removed the fail_idx parameter from cuMemcpyBatchAsync. Patch cache_kernels.cu to match new API signature instead of downgrading. - Restore CUDA 13.0.1, PyTorch 2.9.0+cu130, flashinfer cu130 - Patch: remove fail_idx variable and parameter from cuMemcpyBatchAsync call - Simplify error message to not reference fail_idx
2026-04-03 07:53:12 +00:00
parent 6255c94359
commit 360b0dea58
1 changed files with 4 additions and 1 deletions
--- a/vllm/Dockerfile
+++ b/vllm/Dockerfile
@@ -81,7 +81,7 @@ RUN mkdir -p /wheels && \
 FROM build-base AS build-flashinfer
 ARG FLASHINFER_ENABLE_AOT=1
 ARG FLASHINFER_REF=v0.6.6
-ARG FLASHINFER_BUILD_SUFFIX=cu128
+ARG FLASHINFER_BUILD_SUFFIX=cu130
 ENV FLASHINFER_LOCAL_VERSION=${FLASHINFER_BUILD_SUFFIX:-}
 RUN git clone https://github.com/flashinfer-ai/flashinfer.git
 RUN cd flashinfer && \
@@ -134,6 +134,9 @@ RUN cd vllm && \
    git submodule sync && \
    git submodule update --init --recursive -j 8 && \
    sed -i 's/GIT_TAG [a-f0-9]\{40\}/GIT_TAG main/' cmake/external_projects/vllm_flash_attn.cmake && \
+    sed -i '/size_t fail_idx = 0;/d' csrc/cache_kernels.cu && \
+    sed -i 's/, \&fail_idx,/,/' csrc/cache_kernels.cu && \
+    sed -i 's/"cuMemcpyBatchAsync failed at index ",\s*fail_idx, " with error "/"cuMemcpyBatchAsync failed with error "/' csrc/cache_kernels.cu && \
    export MAX_JOBS=8 && \
    export CMAKE_BUILD_PARALLEL_LEVEL=$MAX_JOBS && \
    python use_existing_torch.py && \