[Feature] Add Azure Blob Storage support for RunAI Model Streamer (#34614)

Signed-off-by: hasethuraman <hsethuraman@microsoft.com>
This commit is contained in:
Hari
2026-03-15 17:08:21 +05:30
committed by GitHub
parent 143e4dccdf
commit a3e2e250f0
14 changed files with 75 additions and 17 deletions

View File

@@ -620,7 +620,7 @@ RUN set -eux; \
ARG BITSANDBYTES_VERSION_X86=0.46.1 ARG BITSANDBYTES_VERSION_X86=0.46.1
ARG BITSANDBYTES_VERSION_ARM64=0.42.0 ARG BITSANDBYTES_VERSION_ARM64=0.42.0
ARG TIMM_VERSION=">=1.0.17" ARG TIMM_VERSION=">=1.0.17"
ARG RUNAI_MODEL_STREAMER_VERSION=">=0.15.3" ARG RUNAI_MODEL_STREAMER_VERSION=">=0.15.7"
RUN --mount=type=cache,target=/root/.cache/uv \ RUN --mount=type=cache,target=/root/.cache/uv \
if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \ if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
BITSANDBYTES_VERSION="${BITSANDBYTES_VERSION_ARM64}"; \ BITSANDBYTES_VERSION="${BITSANDBYTES_VERSION_ARM64}"; \
@@ -628,7 +628,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
BITSANDBYTES_VERSION="${BITSANDBYTES_VERSION_X86}"; \ BITSANDBYTES_VERSION="${BITSANDBYTES_VERSION_X86}"; \
fi; \ fi; \
uv pip install --system accelerate hf_transfer modelscope \ uv pip install --system accelerate hf_transfer modelscope \
"bitsandbytes>=${BITSANDBYTES_VERSION}" "timm${TIMM_VERSION}" "runai-model-streamer[s3,gcs]${RUNAI_MODEL_STREAMER_VERSION}" "bitsandbytes>=${BITSANDBYTES_VERSION}" "timm${TIMM_VERSION}" "runai-model-streamer[s3,gcs,azure]${RUNAI_MODEL_STREAMER_VERSION}"
# ============================================================ # ============================================================
# VLLM INSTALLATION (depends on build stage) # VLLM INSTALLATION (depends on build stage)

View File

@@ -83,7 +83,7 @@
"default": ">=1.0.17" "default": ">=1.0.17"
}, },
"RUNAI_MODEL_STREAMER_VERSION": { "RUNAI_MODEL_STREAMER_VERSION": {
"default": ">=0.15.3" "default": ">=0.15.7"
} }
} }
} }

View File

@@ -31,6 +31,16 @@ vllm serve gs://core-llm/Llama-3-8b \
--load-format runai_streamer --load-format runai_streamer
``` ```
To run model from Azure Blob Storage run:
```bash
AZURE_STORAGE_ACCOUNT_NAME=<account> \
vllm serve az://<container>/<model-path> \
--load-format runai_streamer
```
Authentication uses `DefaultAzureCredential`, which supports `az login`, managed identity, environment variables (`AZURE_CLIENT_ID`, `AZURE_TENANT_ID`, `AZURE_CLIENT_SECRET`), and other methods.
To run model from a S3 compatible object store run: To run model from a S3 compatible object store run:
```bash ```bash

View File

@@ -42,7 +42,7 @@ tritonclient>=2.51.0
numba == 0.61.2 # Required for N-gram speculative decoding numba == 0.61.2 # Required for N-gram speculative decoding
numpy numpy
runai-model-streamer[s3,gcs]==0.15.3 runai-model-streamer[s3,gcs,azure]==0.15.7
fastsafetensors>=0.2.2 fastsafetensors>=0.2.2
instanttensor>=0.1.5 instanttensor>=0.1.5
pydantic>=2.12 # 2.11 leads to error on python 3.13 pydantic>=2.12 # 2.11 leads to error on python 3.13

View File

@@ -15,7 +15,7 @@ tensorizer==2.10.1
packaging>=24.2 packaging>=24.2
setuptools>=77.0.3,<80.0.0 setuptools>=77.0.3,<80.0.0
setuptools-scm>=8 setuptools-scm>=8
runai-model-streamer[s3,gcs]==0.15.3 runai-model-streamer[s3,gcs,azure]==0.15.7
conch-triton-kernels==1.2.1 conch-triton-kernels==1.2.1
timm>=1.0.17 timm>=1.0.17
# amd-quark: required for Quark quantization on ROCm # amd-quark: required for Quark quantization on ROCm

View File

@@ -56,7 +56,7 @@ grpcio-reflection==1.78.0
arctic-inference == 0.1.1 # Required for suffix decoding test arctic-inference == 0.1.1 # Required for suffix decoding test
numba == 0.61.2 # Required for N-gram speculative decoding numba == 0.61.2 # Required for N-gram speculative decoding
numpy numpy
runai-model-streamer[s3,gcs]==0.15.3 runai-model-streamer[s3,gcs,azure]==0.15.7
fastsafetensors>=0.2.2 # 0.2.2 contains important fixes for multi-GPU mem usage fastsafetensors>=0.2.2 # 0.2.2 contains important fixes for multi-GPU mem usage
instanttensor>=0.1.5 instanttensor>=0.1.5
pydantic>=2.12 # 2.11 leads to error on python 3.13 pydantic>=2.12 # 2.11 leads to error on python 3.13

View File

@@ -64,6 +64,14 @@ audioread==3.0.1
# via librosa # via librosa
av==16.1.0 av==16.1.0
# via -r requirements/test.in # via -r requirements/test.in
azure-core==1.38.2
# via
# azure-identity
# azure-storage-blob
azure-identity==1.25.2
# via runai-model-streamer-azure
azure-storage-blob==12.28.0
# via runai-model-streamer-azure
backoff==2.2.1 backoff==2.2.1
# via # via
# -r requirements/test.in # -r requirements/test.in
@@ -103,8 +111,10 @@ certifi==2024.8.30
# rasterio # rasterio
# requests # requests
# sentry-sdk # sentry-sdk
cffi==1.17.1 cffi==2.0.0
# via soundfile # via
# cryptography
# soundfile
chardet==5.2.0 chardet==5.2.0
# via mbstrdecoder # via mbstrdecoder
charset-normalizer==3.4.0 charset-normalizer==3.4.0
@@ -148,6 +158,12 @@ coverage==7.10.6
# via pytest-cov # via pytest-cov
cramjam==2.9.0 cramjam==2.9.0
# via fastparquet # via fastparquet
cryptography==46.0.5
# via
# azure-identity
# azure-storage-blob
# msal
# pyjwt
cuda-bindings==12.9.4 cuda-bindings==12.9.4
# via torch # via torch
cuda-pathfinder==1.3.3 cuda-pathfinder==1.3.3
@@ -379,6 +395,8 @@ iniconfig==2.0.0
# via pytest # via pytest
instanttensor==0.1.5 instanttensor==0.1.5
# via -r requirements/test.in # via -r requirements/test.in
isodate==0.7.2
# via azure-storage-blob
isoduration==20.11.0 isoduration==20.11.0
# via jsonschema # via jsonschema
isort==5.13.2 isort==5.13.2
@@ -492,6 +510,12 @@ more-itertools==10.5.0
# via lm-eval # via lm-eval
mpmath==1.3.0 mpmath==1.3.0
# via sympy # via sympy
msal==1.34.0
# via
# azure-identity
# msal-extensions
msal-extensions==1.3.1
# via azure-identity
msgpack==1.1.0 msgpack==1.1.0
# via # via
# librosa # librosa
@@ -828,6 +852,8 @@ pydantic-extra-types==2.10.5
# via mistral-common # via mistral-common
pygments==2.18.0 pygments==2.18.0
# via rich # via rich
pyjwt==2.11.0
# via msal
pyogrio==0.11.0 pyogrio==0.11.0
# via geopandas # via geopandas
pyparsing==3.2.0 pyparsing==3.2.0
@@ -945,6 +971,7 @@ regex==2024.9.11
# transformers # transformers
requests==2.32.3 requests==2.32.3
# via # via
# azure-core
# buildkite-test-collector # buildkite-test-collector
# datasets # datasets
# diffusers # diffusers
@@ -957,6 +984,7 @@ requests==2.32.3
# lightly # lightly
# lm-eval # lm-eval
# mistral-common # mistral-common
# msal
# mteb # mteb
# pooch # pooch
# ray # ray
@@ -993,11 +1021,13 @@ rsa==4.9.1
# via google-auth # via google-auth
rtree==1.4.0 rtree==1.4.0
# via torchgeo # via torchgeo
runai-model-streamer==0.15.3 runai-model-streamer==0.15.7
# via -r requirements/test.in # via -r requirements/test.in
runai-model-streamer-gcs==0.15.3 runai-model-streamer-azure==0.15.7
# via runai-model-streamer # via runai-model-streamer
runai-model-streamer-s3==0.15.3 runai-model-streamer-gcs==0.15.7
# via runai-model-streamer
runai-model-streamer-s3==0.15.7
# via runai-model-streamer # via runai-model-streamer
s3transfer==0.10.3 s3transfer==0.10.3
# via boto3 # via boto3
@@ -1266,6 +1296,9 @@ typing-extensions==4.15.0
# aiosignal # aiosignal
# albumentations # albumentations
# alembic # alembic
# azure-core
# azure-identity
# azure-storage-blob
# chz # chz
# fastapi # fastapi
# grpcio # grpcio

View File

@@ -970,7 +970,7 @@ setup(
"tensorizer": ["tensorizer==2.10.1"], "tensorizer": ["tensorizer==2.10.1"],
"fastsafetensors": ["fastsafetensors >= 0.2.2"], "fastsafetensors": ["fastsafetensors >= 0.2.2"],
"instanttensor": ["instanttensor >= 0.1.5"], "instanttensor": ["instanttensor >= 0.1.5"],
"runai": ["runai-model-streamer[s3,gcs] >= 0.15.3"], "runai": ["runai-model-streamer[s3,gcs,azure] >= 0.15.7"],
"audio": [ "audio": [
"librosa", "librosa",
"scipy", "scipy",

View File

@@ -19,6 +19,7 @@ from vllm.transformers_utils.runai_utils import (
def test_is_runai_obj_uri(): def test_is_runai_obj_uri():
assert is_runai_obj_uri("gs://some-gcs-bucket/path") assert is_runai_obj_uri("gs://some-gcs-bucket/path")
assert is_runai_obj_uri("s3://some-s3-bucket/path") assert is_runai_obj_uri("s3://some-s3-bucket/path")
assert is_runai_obj_uri("az://some-azure-container/path")
assert not is_runai_obj_uri("nfs://some-nfs-path") assert not is_runai_obj_uri("nfs://some-nfs-path")

View File

@@ -11,6 +11,7 @@ from vllm.transformers_utils.gguf_utils import (
split_remote_gguf, split_remote_gguf,
) )
from vllm.transformers_utils.utils import ( from vllm.transformers_utils.utils import (
is_azure,
is_cloud_storage, is_cloud_storage,
is_gcs, is_gcs,
is_s3, is_s3,
@@ -31,9 +32,17 @@ def test_is_s3():
assert not is_s3("nfs://nfs-fqdn.local") assert not is_s3("nfs://nfs-fqdn.local")
def test_is_azure():
assert is_azure("az://model-container/path")
assert not is_azure("s3://model-path/path-to-model")
assert not is_azure("/unix/local/path")
assert not is_azure("nfs://nfs-fqdn.local")
def test_is_cloud_storage(): def test_is_cloud_storage():
assert is_cloud_storage("gs://model-path") assert is_cloud_storage("gs://model-path")
assert is_cloud_storage("s3://model-path/path-to-model") assert is_cloud_storage("s3://model-path/path-to-model")
assert is_cloud_storage("az://model-container/path")
assert not is_cloud_storage("/unix/local/path") assert not is_cloud_storage("/unix/local/path")
assert not is_cloud_storage("nfs://nfs-fqdn.local") assert not is_cloud_storage("nfs://nfs-fqdn.local")

View File

@@ -1574,8 +1574,9 @@ class VllmConfig:
"runai_streamer_sharded", "runai_streamer_sharded",
): ):
raise ValueError( raise ValueError(
f"To load a model from S3, 'load_format' " f"To load a model from object storage (S3/GCS/Azure), "
f"must be 'runai_streamer' or 'runai_streamer_sharded', " f"'load_format' must be 'runai_streamer' or "
f"'runai_streamer_sharded', "
f"but got '{self.load_config.load_format}'. " f"but got '{self.load_config.load_format}'. "
f"Model: {self.model_config.model}" f"Model: {self.model_config.model}"
) )

View File

@@ -21,7 +21,7 @@ from vllm.transformers_utils.runai_utils import is_runai_obj_uri, list_safetenso
class RunaiModelStreamerLoader(BaseModelLoader): class RunaiModelStreamerLoader(BaseModelLoader):
""" """
Model loader that can load safetensors Model loader that can load safetensors
files from local FS or S3 bucket. files from local FS, S3, GCS, or Azure Blob Storage.
""" """
def __init__(self, load_config: LoadConfig): def __init__(self, load_config: LoadConfig):

View File

@@ -13,7 +13,7 @@ from vllm.utils.import_utils import PlaceholderModule
logger = init_logger(__name__) logger = init_logger(__name__)
SUPPORTED_SCHEMES = ["s3://", "gs://"] SUPPORTED_SCHEMES = ["s3://", "gs://", "az://"]
try: try:
from runai_model_streamer import list_safetensors as runai_list_safetensors from runai_model_streamer import list_safetensors as runai_list_safetensors

View File

@@ -23,8 +23,12 @@ def is_gcs(model_or_path: str) -> bool:
return model_or_path.lower().startswith("gs://") return model_or_path.lower().startswith("gs://")
def is_azure(model_or_path: str) -> bool:
return model_or_path.lower().startswith("az://")
def is_cloud_storage(model_or_path: str) -> bool: def is_cloud_storage(model_or_path: str) -> bool:
return is_s3(model_or_path) or is_gcs(model_or_path) return is_s3(model_or_path) or is_gcs(model_or_path) or is_azure(model_or_path)
def without_trust_remote_code(kwargs: dict[str, Any]) -> dict[str, Any]: def without_trust_remote_code(kwargs: dict[str, Any]) -> dict[str, Any]: