[Feature] Add Azure Blob Storage support for RunAI Model Streamer (#34614)

Signed-off-by: hasethuraman <hsethuraman@microsoft.com>
This commit is contained in:
Hari
2026-03-15 17:08:21 +05:30
committed by GitHub
parent 143e4dccdf
commit a3e2e250f0
14 changed files with 75 additions and 17 deletions

View File

@@ -620,7 +620,7 @@ RUN set -eux; \
ARG BITSANDBYTES_VERSION_X86=0.46.1
ARG BITSANDBYTES_VERSION_ARM64=0.42.0
ARG TIMM_VERSION=">=1.0.17"
ARG RUNAI_MODEL_STREAMER_VERSION=">=0.15.3"
ARG RUNAI_MODEL_STREAMER_VERSION=">=0.15.7"
RUN --mount=type=cache,target=/root/.cache/uv \
if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
BITSANDBYTES_VERSION="${BITSANDBYTES_VERSION_ARM64}"; \
@@ -628,7 +628,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
BITSANDBYTES_VERSION="${BITSANDBYTES_VERSION_X86}"; \
fi; \
uv pip install --system accelerate hf_transfer modelscope \
"bitsandbytes>=${BITSANDBYTES_VERSION}" "timm${TIMM_VERSION}" "runai-model-streamer[s3,gcs]${RUNAI_MODEL_STREAMER_VERSION}"
"bitsandbytes>=${BITSANDBYTES_VERSION}" "timm${TIMM_VERSION}" "runai-model-streamer[s3,gcs,azure]${RUNAI_MODEL_STREAMER_VERSION}"
# ============================================================
# VLLM INSTALLATION (depends on build stage)

View File

@@ -83,7 +83,7 @@
"default": ">=1.0.17"
},
"RUNAI_MODEL_STREAMER_VERSION": {
"default": ">=0.15.3"
"default": ">=0.15.7"
}
}
}

View File

@@ -31,6 +31,16 @@ vllm serve gs://core-llm/Llama-3-8b \
--load-format runai_streamer
```
To run model from Azure Blob Storage run:
```bash
AZURE_STORAGE_ACCOUNT_NAME=<account> \
vllm serve az://<container>/<model-path> \
--load-format runai_streamer
```
Authentication uses `DefaultAzureCredential`, which supports `az login`, managed identity, environment variables (`AZURE_CLIENT_ID`, `AZURE_TENANT_ID`, `AZURE_CLIENT_SECRET`), and other methods.
To run model from a S3 compatible object store run:
```bash

View File

@@ -42,7 +42,7 @@ tritonclient>=2.51.0
numba == 0.61.2 # Required for N-gram speculative decoding
numpy
runai-model-streamer[s3,gcs]==0.15.3
runai-model-streamer[s3,gcs,azure]==0.15.7
fastsafetensors>=0.2.2
instanttensor>=0.1.5
pydantic>=2.12 # 2.11 leads to error on python 3.13

View File

@@ -15,7 +15,7 @@ tensorizer==2.10.1
packaging>=24.2
setuptools>=77.0.3,<80.0.0
setuptools-scm>=8
runai-model-streamer[s3,gcs]==0.15.3
runai-model-streamer[s3,gcs,azure]==0.15.7
conch-triton-kernels==1.2.1
timm>=1.0.17
# amd-quark: required for Quark quantization on ROCm

View File

@@ -56,7 +56,7 @@ grpcio-reflection==1.78.0
arctic-inference == 0.1.1 # Required for suffix decoding test
numba == 0.61.2 # Required for N-gram speculative decoding
numpy
runai-model-streamer[s3,gcs]==0.15.3
runai-model-streamer[s3,gcs,azure]==0.15.7
fastsafetensors>=0.2.2 # 0.2.2 contains important fixes for multi-GPU mem usage
instanttensor>=0.1.5
pydantic>=2.12 # 2.11 leads to error on python 3.13

View File

@@ -64,6 +64,14 @@ audioread==3.0.1
# via librosa
av==16.1.0
# via -r requirements/test.in
azure-core==1.38.2
# via
# azure-identity
# azure-storage-blob
azure-identity==1.25.2
# via runai-model-streamer-azure
azure-storage-blob==12.28.0
# via runai-model-streamer-azure
backoff==2.2.1
# via
# -r requirements/test.in
@@ -103,8 +111,10 @@ certifi==2024.8.30
# rasterio
# requests
# sentry-sdk
cffi==1.17.1
# via soundfile
cffi==2.0.0
# via
# cryptography
# soundfile
chardet==5.2.0
# via mbstrdecoder
charset-normalizer==3.4.0
@@ -148,6 +158,12 @@ coverage==7.10.6
# via pytest-cov
cramjam==2.9.0
# via fastparquet
cryptography==46.0.5
# via
# azure-identity
# azure-storage-blob
# msal
# pyjwt
cuda-bindings==12.9.4
# via torch
cuda-pathfinder==1.3.3
@@ -379,6 +395,8 @@ iniconfig==2.0.0
# via pytest
instanttensor==0.1.5
# via -r requirements/test.in
isodate==0.7.2
# via azure-storage-blob
isoduration==20.11.0
# via jsonschema
isort==5.13.2
@@ -492,6 +510,12 @@ more-itertools==10.5.0
# via lm-eval
mpmath==1.3.0
# via sympy
msal==1.34.0
# via
# azure-identity
# msal-extensions
msal-extensions==1.3.1
# via azure-identity
msgpack==1.1.0
# via
# librosa
@@ -828,6 +852,8 @@ pydantic-extra-types==2.10.5
# via mistral-common
pygments==2.18.0
# via rich
pyjwt==2.11.0
# via msal
pyogrio==0.11.0
# via geopandas
pyparsing==3.2.0
@@ -945,6 +971,7 @@ regex==2024.9.11
# transformers
requests==2.32.3
# via
# azure-core
# buildkite-test-collector
# datasets
# diffusers
@@ -957,6 +984,7 @@ requests==2.32.3
# lightly
# lm-eval
# mistral-common
# msal
# mteb
# pooch
# ray
@@ -993,11 +1021,13 @@ rsa==4.9.1
# via google-auth
rtree==1.4.0
# via torchgeo
runai-model-streamer==0.15.3
runai-model-streamer==0.15.7
# via -r requirements/test.in
runai-model-streamer-gcs==0.15.3
runai-model-streamer-azure==0.15.7
# via runai-model-streamer
runai-model-streamer-s3==0.15.3
runai-model-streamer-gcs==0.15.7
# via runai-model-streamer
runai-model-streamer-s3==0.15.7
# via runai-model-streamer
s3transfer==0.10.3
# via boto3
@@ -1266,6 +1296,9 @@ typing-extensions==4.15.0
# aiosignal
# albumentations
# alembic
# azure-core
# azure-identity
# azure-storage-blob
# chz
# fastapi
# grpcio

View File

@@ -970,7 +970,7 @@ setup(
"tensorizer": ["tensorizer==2.10.1"],
"fastsafetensors": ["fastsafetensors >= 0.2.2"],
"instanttensor": ["instanttensor >= 0.1.5"],
"runai": ["runai-model-streamer[s3,gcs] >= 0.15.3"],
"runai": ["runai-model-streamer[s3,gcs,azure] >= 0.15.7"],
"audio": [
"librosa",
"scipy",

View File

@@ -19,6 +19,7 @@ from vllm.transformers_utils.runai_utils import (
def test_is_runai_obj_uri():
assert is_runai_obj_uri("gs://some-gcs-bucket/path")
assert is_runai_obj_uri("s3://some-s3-bucket/path")
assert is_runai_obj_uri("az://some-azure-container/path")
assert not is_runai_obj_uri("nfs://some-nfs-path")

View File

@@ -11,6 +11,7 @@ from vllm.transformers_utils.gguf_utils import (
split_remote_gguf,
)
from vllm.transformers_utils.utils import (
is_azure,
is_cloud_storage,
is_gcs,
is_s3,
@@ -31,9 +32,17 @@ def test_is_s3():
assert not is_s3("nfs://nfs-fqdn.local")
def test_is_azure():
assert is_azure("az://model-container/path")
assert not is_azure("s3://model-path/path-to-model")
assert not is_azure("/unix/local/path")
assert not is_azure("nfs://nfs-fqdn.local")
def test_is_cloud_storage():
assert is_cloud_storage("gs://model-path")
assert is_cloud_storage("s3://model-path/path-to-model")
assert is_cloud_storage("az://model-container/path")
assert not is_cloud_storage("/unix/local/path")
assert not is_cloud_storage("nfs://nfs-fqdn.local")

View File

@@ -1574,8 +1574,9 @@ class VllmConfig:
"runai_streamer_sharded",
):
raise ValueError(
f"To load a model from S3, 'load_format' "
f"must be 'runai_streamer' or 'runai_streamer_sharded', "
f"To load a model from object storage (S3/GCS/Azure), "
f"'load_format' must be 'runai_streamer' or "
f"'runai_streamer_sharded', "
f"but got '{self.load_config.load_format}'. "
f"Model: {self.model_config.model}"
)

View File

@@ -21,7 +21,7 @@ from vllm.transformers_utils.runai_utils import is_runai_obj_uri, list_safetenso
class RunaiModelStreamerLoader(BaseModelLoader):
"""
Model loader that can load safetensors
files from local FS or S3 bucket.
files from local FS, S3, GCS, or Azure Blob Storage.
"""
def __init__(self, load_config: LoadConfig):

View File

@@ -13,7 +13,7 @@ from vllm.utils.import_utils import PlaceholderModule
logger = init_logger(__name__)
SUPPORTED_SCHEMES = ["s3://", "gs://"]
SUPPORTED_SCHEMES = ["s3://", "gs://", "az://"]
try:
from runai_model_streamer import list_safetensors as runai_list_safetensors

View File

@@ -23,8 +23,12 @@ def is_gcs(model_or_path: str) -> bool:
return model_or_path.lower().startswith("gs://")
def is_azure(model_or_path: str) -> bool:
return model_or_path.lower().startswith("az://")
def is_cloud_storage(model_or_path: str) -> bool:
return is_s3(model_or_path) or is_gcs(model_or_path)
return is_s3(model_or_path) or is_gcs(model_or_path) or is_azure(model_or_path)
def without_trust_remote_code(kwargs: dict[str, Any]) -> dict[str, Any]: