[Sampler] Adapt to FlashInfer 0.2.3 sampler API (#15777)

Signed-off-by: Bowen Wang <abmfy@icloud.com>
Co-authored-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
Bowen Wang
2025-05-16 15:14:03 -07:00
committed by GitHub
parent aef94c6d07
commit 7fdfa01530
7 changed files with 122 additions and 88 deletions

View File

@@ -255,9 +255,10 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist
RUN --mount=type=cache,target=/root/.cache/uv \
. /etc/environment && \
if [ "$TARGETPLATFORM" != "linux/arm64" ]; then \
# uv pip install --system https://github.com/flashinfer-ai/flashinfer/releases/download/v0.2.4/flashinfer_python-0.2.4+cu124torch2.6-cp38-abi3-linux_x86_64.whl ; \
# TESTING: install FlashInfer from source to test 2.7.0 final RC
FLASHINFER_ENABLE_AOT=1 TORCH_CUDA_ARCH_LIST='7.5 8.0 8.6 8.9 9.0+PTX' \
uv pip install --system --no-build-isolation "git+https://github.com/flashinfer-ai/flashinfer@v0.2.2.post1" ; \
uv pip install --system --no-build-isolation "git+https://github.com/flashinfer-ai/flashinfer@v0.2.4" ; \
fi
COPY examples examples
COPY benchmarks benchmarks