[BugFix] Fix plan API Mismatch when using latest FlashInfer (#29426)

Signed-off-by: Andrii Skliar <askliar@askliar-mlt.client.nvidia.com>
Co-authored-by: Andrii Skliar <askliar@askliar-mlt.client.nvidia.com>
This commit is contained in:
Andrii Skliar
2025-11-27 20:34:59 +01:00
committed by GitHub
parent e5a621b724
commit a5345bf49d
3 changed files with 5 additions and 4 deletions

View File

@@ -398,8 +398,8 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist
# Install FlashInfer pre-compiled kernel cache and binaries
# https://docs.flashinfer.ai/installation.html
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --system flashinfer-cubin==0.5.2 \
&& uv pip install --system flashinfer-jit-cache==0.5.2 \
uv pip install --system flashinfer-cubin==0.5.3 \
&& uv pip install --system flashinfer-jit-cache==0.5.3 \
--extra-index-url https://flashinfer.ai/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') \
&& flashinfer show-config