[Docker] Add fastsafetensors to NVIDIA Dockerfile (#38950)

This commit is contained in:
Zhewen Li
2026-04-08 22:21:37 -07:00
committed by GitHub
parent e80e633927
commit 9e78555743
5 changed files with 14 additions and 4 deletions

View File

@@ -538,7 +538,9 @@ RUN CUDA_VERSION_DASH=$(echo $CUDA_VERSION | cut -d. -f1,2 | tr '.' '-') && \
cuda-nvrtc-${CUDA_VERSION_DASH} \
cuda-cuobjdump-${CUDA_VERSION_DASH} \
libcurand-dev-${CUDA_VERSION_DASH} \
libcublas-${CUDA_VERSION_DASH} && \
libcublas-${CUDA_VERSION_DASH} \
# Required by fastsafetensors (fixes #20384)
libnuma-dev && \
# Fixes nccl_allocator requiring nccl.h at runtime
# https://github.com/vllm-project/vllm/blob/1336a1ea244fa8bfd7e72751cabbdb5b68a0c11a/vllm/distributed/device_communicators/pynccl_allocator.py#L22
# NCCL packages don't use the cuda-MAJOR-MINOR naming convention,

View File

@@ -19,7 +19,8 @@ ENV PYTORCH_ROCM_ARCH=${ARG_PYTORCH_ROCM_ARCH:-${PYTORCH_ROCM_ARCH}}
# Install some basic utilities
RUN apt-get update -q -y && apt-get install -q -y \
sqlite3 libsqlite3-dev libfmt-dev libmsgpack-dev libsuitesparse-dev \
apt-transport-https ca-certificates wget curl
apt-transport-https ca-certificates wget curl \
libnuma-dev
RUN python3 -m pip install --upgrade pip
# Remove sccache only if not using sccache (it exists in base image from Dockerfile.rocm_base)
ARG USE_SCCACHE

View File

@@ -15,6 +15,9 @@ flashinfer-cubin==0.6.7
# breaking changes in 1.19.0
nvidia-cudnn-frontend>=1.13.0,<1.19.0
# Required for faster safetensors model loading
fastsafetensors >= 0.2.2
# QuACK and Cutlass DSL for FA4 (cute-DSL implementation)
nvidia-cutlass-dsl>=4.4.2
quack-kernels>=0.3.3

View File

@@ -276,7 +276,9 @@ fastar==0.9.0
fastparquet==2026.3.0
# via genai-perf
fastsafetensors==0.2.2
# via -r requirements/rocm-test.in
# via
# -c requirements/rocm.txt
# -r requirements/rocm-test.in
filelock==3.25.2
# via
# -c requirements/common.txt

View File

@@ -20,4 +20,6 @@ conch-triton-kernels==1.2.1
timm>=1.0.17
# amd-quark: required for Quark quantization on ROCm
# To be consistent with test_quark.py
amd-quark>=0.8.99
amd-quark>=0.8.99
# Required for faster safetensors model loading
fastsafetensors >= 0.2.2