[Docker] Add fastsafetensors to NVIDIA Dockerfile (#38950)
This commit is contained in:
@@ -538,7 +538,9 @@ RUN CUDA_VERSION_DASH=$(echo $CUDA_VERSION | cut -d. -f1,2 | tr '.' '-') && \
|
|||||||
cuda-nvrtc-${CUDA_VERSION_DASH} \
|
cuda-nvrtc-${CUDA_VERSION_DASH} \
|
||||||
cuda-cuobjdump-${CUDA_VERSION_DASH} \
|
cuda-cuobjdump-${CUDA_VERSION_DASH} \
|
||||||
libcurand-dev-${CUDA_VERSION_DASH} \
|
libcurand-dev-${CUDA_VERSION_DASH} \
|
||||||
libcublas-${CUDA_VERSION_DASH} && \
|
libcublas-${CUDA_VERSION_DASH} \
|
||||||
|
# Required by fastsafetensors (fixes #20384)
|
||||||
|
libnuma-dev && \
|
||||||
# Fixes nccl_allocator requiring nccl.h at runtime
|
# Fixes nccl_allocator requiring nccl.h at runtime
|
||||||
# https://github.com/vllm-project/vllm/blob/1336a1ea244fa8bfd7e72751cabbdb5b68a0c11a/vllm/distributed/device_communicators/pynccl_allocator.py#L22
|
# https://github.com/vllm-project/vllm/blob/1336a1ea244fa8bfd7e72751cabbdb5b68a0c11a/vllm/distributed/device_communicators/pynccl_allocator.py#L22
|
||||||
# NCCL packages don't use the cuda-MAJOR-MINOR naming convention,
|
# NCCL packages don't use the cuda-MAJOR-MINOR naming convention,
|
||||||
|
|||||||
@@ -19,7 +19,8 @@ ENV PYTORCH_ROCM_ARCH=${ARG_PYTORCH_ROCM_ARCH:-${PYTORCH_ROCM_ARCH}}
|
|||||||
# Install some basic utilities
|
# Install some basic utilities
|
||||||
RUN apt-get update -q -y && apt-get install -q -y \
|
RUN apt-get update -q -y && apt-get install -q -y \
|
||||||
sqlite3 libsqlite3-dev libfmt-dev libmsgpack-dev libsuitesparse-dev \
|
sqlite3 libsqlite3-dev libfmt-dev libmsgpack-dev libsuitesparse-dev \
|
||||||
apt-transport-https ca-certificates wget curl
|
apt-transport-https ca-certificates wget curl \
|
||||||
|
libnuma-dev
|
||||||
RUN python3 -m pip install --upgrade pip
|
RUN python3 -m pip install --upgrade pip
|
||||||
# Remove sccache only if not using sccache (it exists in base image from Dockerfile.rocm_base)
|
# Remove sccache only if not using sccache (it exists in base image from Dockerfile.rocm_base)
|
||||||
ARG USE_SCCACHE
|
ARG USE_SCCACHE
|
||||||
|
|||||||
@@ -15,6 +15,9 @@ flashinfer-cubin==0.6.7
|
|||||||
# breaking changes in 1.19.0
|
# breaking changes in 1.19.0
|
||||||
nvidia-cudnn-frontend>=1.13.0,<1.19.0
|
nvidia-cudnn-frontend>=1.13.0,<1.19.0
|
||||||
|
|
||||||
|
# Required for faster safetensors model loading
|
||||||
|
fastsafetensors >= 0.2.2
|
||||||
|
|
||||||
# QuACK and Cutlass DSL for FA4 (cute-DSL implementation)
|
# QuACK and Cutlass DSL for FA4 (cute-DSL implementation)
|
||||||
nvidia-cutlass-dsl>=4.4.2
|
nvidia-cutlass-dsl>=4.4.2
|
||||||
quack-kernels>=0.3.3
|
quack-kernels>=0.3.3
|
||||||
|
|||||||
@@ -276,7 +276,9 @@ fastar==0.9.0
|
|||||||
fastparquet==2026.3.0
|
fastparquet==2026.3.0
|
||||||
# via genai-perf
|
# via genai-perf
|
||||||
fastsafetensors==0.2.2
|
fastsafetensors==0.2.2
|
||||||
# via -r requirements/rocm-test.in
|
# via
|
||||||
|
# -c requirements/rocm.txt
|
||||||
|
# -r requirements/rocm-test.in
|
||||||
filelock==3.25.2
|
filelock==3.25.2
|
||||||
# via
|
# via
|
||||||
# -c requirements/common.txt
|
# -c requirements/common.txt
|
||||||
|
|||||||
@@ -20,4 +20,6 @@ conch-triton-kernels==1.2.1
|
|||||||
timm>=1.0.17
|
timm>=1.0.17
|
||||||
# amd-quark: required for Quark quantization on ROCm
|
# amd-quark: required for Quark quantization on ROCm
|
||||||
# To be consistent with test_quark.py
|
# To be consistent with test_quark.py
|
||||||
amd-quark>=0.8.99
|
amd-quark>=0.8.99
|
||||||
|
# Required for faster safetensors model loading
|
||||||
|
fastsafetensors >= 0.2.2
|
||||||
Reference in New Issue
Block a user