[Docker] Add fastsafetensors to NVIDIA Dockerfile (#38950)
This commit is contained in:
@@ -538,7 +538,9 @@ RUN CUDA_VERSION_DASH=$(echo $CUDA_VERSION | cut -d. -f1,2 | tr '.' '-') && \
|
||||
cuda-nvrtc-${CUDA_VERSION_DASH} \
|
||||
cuda-cuobjdump-${CUDA_VERSION_DASH} \
|
||||
libcurand-dev-${CUDA_VERSION_DASH} \
|
||||
libcublas-${CUDA_VERSION_DASH} && \
|
||||
libcublas-${CUDA_VERSION_DASH} \
|
||||
# Required by fastsafetensors (fixes #20384)
|
||||
libnuma-dev && \
|
||||
# Fixes nccl_allocator requiring nccl.h at runtime
|
||||
# https://github.com/vllm-project/vllm/blob/1336a1ea244fa8bfd7e72751cabbdb5b68a0c11a/vllm/distributed/device_communicators/pynccl_allocator.py#L22
|
||||
# NCCL packages don't use the cuda-MAJOR-MINOR naming convention,
|
||||
|
||||
@@ -19,7 +19,8 @@ ENV PYTORCH_ROCM_ARCH=${ARG_PYTORCH_ROCM_ARCH:-${PYTORCH_ROCM_ARCH}}
|
||||
# Install some basic utilities
|
||||
RUN apt-get update -q -y && apt-get install -q -y \
|
||||
sqlite3 libsqlite3-dev libfmt-dev libmsgpack-dev libsuitesparse-dev \
|
||||
apt-transport-https ca-certificates wget curl
|
||||
apt-transport-https ca-certificates wget curl \
|
||||
libnuma-dev
|
||||
RUN python3 -m pip install --upgrade pip
|
||||
# Remove sccache only if not using sccache (it exists in base image from Dockerfile.rocm_base)
|
||||
ARG USE_SCCACHE
|
||||
|
||||
@@ -15,6 +15,9 @@ flashinfer-cubin==0.6.7
|
||||
# breaking changes in 1.19.0
|
||||
nvidia-cudnn-frontend>=1.13.0,<1.19.0
|
||||
|
||||
# Required for faster safetensors model loading
|
||||
fastsafetensors >= 0.2.2
|
||||
|
||||
# QuACK and Cutlass DSL for FA4 (cute-DSL implementation)
|
||||
nvidia-cutlass-dsl>=4.4.2
|
||||
quack-kernels>=0.3.3
|
||||
|
||||
@@ -276,7 +276,9 @@ fastar==0.9.0
|
||||
fastparquet==2026.3.0
|
||||
# via genai-perf
|
||||
fastsafetensors==0.2.2
|
||||
# via -r requirements/rocm-test.in
|
||||
# via
|
||||
# -c requirements/rocm.txt
|
||||
# -r requirements/rocm-test.in
|
||||
filelock==3.25.2
|
||||
# via
|
||||
# -c requirements/common.txt
|
||||
|
||||
@@ -20,4 +20,6 @@ conch-triton-kernels==1.2.1
|
||||
timm>=1.0.17
|
||||
# amd-quark: required for Quark quantization on ROCm
|
||||
# To be consistent with test_quark.py
|
||||
amd-quark>=0.8.99
|
||||
amd-quark>=0.8.99
|
||||
# Required for faster safetensors model loading
|
||||
fastsafetensors >= 0.2.2
|
||||
Reference in New Issue
Block a user