diff --git a/vllm/Dockerfile b/vllm/Dockerfile index fb5015e..fca4f89 100644 --- a/vllm/Dockerfile +++ b/vllm/Dockerfile @@ -81,7 +81,7 @@ RUN mkdir -p /wheels && \ FROM build-base AS build-flashinfer ARG FLASHINFER_ENABLE_AOT=1 ARG FLASHINFER_REF=v0.6.6 -ARG FLASHINFER_BUILD_SUFFIX=cu128 +ARG FLASHINFER_BUILD_SUFFIX=cu130 ENV FLASHINFER_LOCAL_VERSION=${FLASHINFER_BUILD_SUFFIX:-} RUN git clone https://github.com/flashinfer-ai/flashinfer.git RUN cd flashinfer && \ @@ -134,6 +134,9 @@ RUN cd vllm && \ git submodule sync && \ git submodule update --init --recursive -j 8 && \ sed -i 's/GIT_TAG [a-f0-9]\{40\}/GIT_TAG main/' cmake/external_projects/vllm_flash_attn.cmake && \ + sed -i '/size_t fail_idx = 0;/d' csrc/cache_kernels.cu && \ + sed -i 's/, \&fail_idx,/,/' csrc/cache_kernels.cu && \ + sed -i 's/"cuMemcpyBatchAsync failed at index ",\s*fail_idx, " with error "/"cuMemcpyBatchAsync failed with error "/' csrc/cache_kernels.cu && \ export MAX_JOBS=8 && \ export CMAKE_BUILD_PARALLEL_LEVEL=$MAX_JOBS && \ python use_existing_torch.py && \