From a3e2e250f09d7a347cfdccfe2f7b593edd1b7bce Mon Sep 17 00:00:00 2001 From: Hari Date: Sun, 15 Mar 2026 17:08:21 +0530 Subject: [PATCH] [Feature] Add Azure Blob Storage support for RunAI Model Streamer (#34614) Signed-off-by: hasethuraman --- docker/Dockerfile | 4 +- docker/versions.json | 2 +- .../models/extensions/runai_model_streamer.md | 10 +++++ requirements/nightly_torch_test.txt | 2 +- requirements/rocm.txt | 2 +- requirements/test.in | 2 +- requirements/test.txt | 43 ++++++++++++++++--- setup.py | 2 +- .../runai_streamer_loader/test_runai_utils.py | 1 + tests/transformers_utils/test_utils.py | 9 ++++ vllm/config/vllm.py | 5 ++- .../model_loader/runai_streamer_loader.py | 2 +- vllm/transformers_utils/runai_utils.py | 2 +- vllm/transformers_utils/utils.py | 6 ++- 14 files changed, 75 insertions(+), 17 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 23fe30704..2abf03515 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -620,7 +620,7 @@ RUN set -eux; \ ARG BITSANDBYTES_VERSION_X86=0.46.1 ARG BITSANDBYTES_VERSION_ARM64=0.42.0 ARG TIMM_VERSION=">=1.0.17" -ARG RUNAI_MODEL_STREAMER_VERSION=">=0.15.3" +ARG RUNAI_MODEL_STREAMER_VERSION=">=0.15.7" RUN --mount=type=cache,target=/root/.cache/uv \ if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \ BITSANDBYTES_VERSION="${BITSANDBYTES_VERSION_ARM64}"; \ @@ -628,7 +628,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \ BITSANDBYTES_VERSION="${BITSANDBYTES_VERSION_X86}"; \ fi; \ uv pip install --system accelerate hf_transfer modelscope \ - "bitsandbytes>=${BITSANDBYTES_VERSION}" "timm${TIMM_VERSION}" "runai-model-streamer[s3,gcs]${RUNAI_MODEL_STREAMER_VERSION}" + "bitsandbytes>=${BITSANDBYTES_VERSION}" "timm${TIMM_VERSION}" "runai-model-streamer[s3,gcs,azure]${RUNAI_MODEL_STREAMER_VERSION}" # ============================================================ # VLLM INSTALLATION (depends on build stage) diff --git a/docker/versions.json b/docker/versions.json index d7c2a06ba..74a974a35 100644 --- a/docker/versions.json +++ b/docker/versions.json @@ -83,7 +83,7 @@ "default": ">=1.0.17" }, "RUNAI_MODEL_STREAMER_VERSION": { - "default": ">=0.15.3" + "default": ">=0.15.7" } } } diff --git a/docs/models/extensions/runai_model_streamer.md b/docs/models/extensions/runai_model_streamer.md index fc9d5eec3..38c603b46 100644 --- a/docs/models/extensions/runai_model_streamer.md +++ b/docs/models/extensions/runai_model_streamer.md @@ -31,6 +31,16 @@ vllm serve gs://core-llm/Llama-3-8b \ --load-format runai_streamer ``` +To run model from Azure Blob Storage run: + +```bash +AZURE_STORAGE_ACCOUNT_NAME= \ +vllm serve az:/// \ + --load-format runai_streamer +``` + +Authentication uses `DefaultAzureCredential`, which supports `az login`, managed identity, environment variables (`AZURE_CLIENT_ID`, `AZURE_TENANT_ID`, `AZURE_CLIENT_SECRET`), and other methods. + To run model from a S3 compatible object store run: ```bash diff --git a/requirements/nightly_torch_test.txt b/requirements/nightly_torch_test.txt index 4d2bf8d2b..ca9c5bd1c 100644 --- a/requirements/nightly_torch_test.txt +++ b/requirements/nightly_torch_test.txt @@ -42,7 +42,7 @@ tritonclient>=2.51.0 numba == 0.61.2 # Required for N-gram speculative decoding numpy -runai-model-streamer[s3,gcs]==0.15.3 +runai-model-streamer[s3,gcs,azure]==0.15.7 fastsafetensors>=0.2.2 instanttensor>=0.1.5 pydantic>=2.12 # 2.11 leads to error on python 3.13 diff --git a/requirements/rocm.txt b/requirements/rocm.txt index d70083338..6639e71a4 100644 --- a/requirements/rocm.txt +++ b/requirements/rocm.txt @@ -15,7 +15,7 @@ tensorizer==2.10.1 packaging>=24.2 setuptools>=77.0.3,<80.0.0 setuptools-scm>=8 -runai-model-streamer[s3,gcs]==0.15.3 +runai-model-streamer[s3,gcs,azure]==0.15.7 conch-triton-kernels==1.2.1 timm>=1.0.17 # amd-quark: required for Quark quantization on ROCm diff --git a/requirements/test.in b/requirements/test.in index 3d742a603..8bd005144 100644 --- a/requirements/test.in +++ b/requirements/test.in @@ -56,7 +56,7 @@ grpcio-reflection==1.78.0 arctic-inference == 0.1.1 # Required for suffix decoding test numba == 0.61.2 # Required for N-gram speculative decoding numpy -runai-model-streamer[s3,gcs]==0.15.3 +runai-model-streamer[s3,gcs,azure]==0.15.7 fastsafetensors>=0.2.2 # 0.2.2 contains important fixes for multi-GPU mem usage instanttensor>=0.1.5 pydantic>=2.12 # 2.11 leads to error on python 3.13 diff --git a/requirements/test.txt b/requirements/test.txt index a3340aeaa..e2f9040be 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -64,6 +64,14 @@ audioread==3.0.1 # via librosa av==16.1.0 # via -r requirements/test.in +azure-core==1.38.2 + # via + # azure-identity + # azure-storage-blob +azure-identity==1.25.2 + # via runai-model-streamer-azure +azure-storage-blob==12.28.0 + # via runai-model-streamer-azure backoff==2.2.1 # via # -r requirements/test.in @@ -103,8 +111,10 @@ certifi==2024.8.30 # rasterio # requests # sentry-sdk -cffi==1.17.1 - # via soundfile +cffi==2.0.0 + # via + # cryptography + # soundfile chardet==5.2.0 # via mbstrdecoder charset-normalizer==3.4.0 @@ -148,6 +158,12 @@ coverage==7.10.6 # via pytest-cov cramjam==2.9.0 # via fastparquet +cryptography==46.0.5 + # via + # azure-identity + # azure-storage-blob + # msal + # pyjwt cuda-bindings==12.9.4 # via torch cuda-pathfinder==1.3.3 @@ -379,6 +395,8 @@ iniconfig==2.0.0 # via pytest instanttensor==0.1.5 # via -r requirements/test.in +isodate==0.7.2 + # via azure-storage-blob isoduration==20.11.0 # via jsonschema isort==5.13.2 @@ -492,6 +510,12 @@ more-itertools==10.5.0 # via lm-eval mpmath==1.3.0 # via sympy +msal==1.34.0 + # via + # azure-identity + # msal-extensions +msal-extensions==1.3.1 + # via azure-identity msgpack==1.1.0 # via # librosa @@ -828,6 +852,8 @@ pydantic-extra-types==2.10.5 # via mistral-common pygments==2.18.0 # via rich +pyjwt==2.11.0 + # via msal pyogrio==0.11.0 # via geopandas pyparsing==3.2.0 @@ -945,6 +971,7 @@ regex==2024.9.11 # transformers requests==2.32.3 # via + # azure-core # buildkite-test-collector # datasets # diffusers @@ -957,6 +984,7 @@ requests==2.32.3 # lightly # lm-eval # mistral-common + # msal # mteb # pooch # ray @@ -993,11 +1021,13 @@ rsa==4.9.1 # via google-auth rtree==1.4.0 # via torchgeo -runai-model-streamer==0.15.3 +runai-model-streamer==0.15.7 # via -r requirements/test.in -runai-model-streamer-gcs==0.15.3 +runai-model-streamer-azure==0.15.7 # via runai-model-streamer -runai-model-streamer-s3==0.15.3 +runai-model-streamer-gcs==0.15.7 + # via runai-model-streamer +runai-model-streamer-s3==0.15.7 # via runai-model-streamer s3transfer==0.10.3 # via boto3 @@ -1266,6 +1296,9 @@ typing-extensions==4.15.0 # aiosignal # albumentations # alembic + # azure-core + # azure-identity + # azure-storage-blob # chz # fastapi # grpcio diff --git a/setup.py b/setup.py index bcd353b14..829552fba 100644 --- a/setup.py +++ b/setup.py @@ -970,7 +970,7 @@ setup( "tensorizer": ["tensorizer==2.10.1"], "fastsafetensors": ["fastsafetensors >= 0.2.2"], "instanttensor": ["instanttensor >= 0.1.5"], - "runai": ["runai-model-streamer[s3,gcs] >= 0.15.3"], + "runai": ["runai-model-streamer[s3,gcs,azure] >= 0.15.7"], "audio": [ "librosa", "scipy", diff --git a/tests/model_executor/model_loader/runai_streamer_loader/test_runai_utils.py b/tests/model_executor/model_loader/runai_streamer_loader/test_runai_utils.py index 3ad7308ee..ad852f695 100644 --- a/tests/model_executor/model_loader/runai_streamer_loader/test_runai_utils.py +++ b/tests/model_executor/model_loader/runai_streamer_loader/test_runai_utils.py @@ -19,6 +19,7 @@ from vllm.transformers_utils.runai_utils import ( def test_is_runai_obj_uri(): assert is_runai_obj_uri("gs://some-gcs-bucket/path") assert is_runai_obj_uri("s3://some-s3-bucket/path") + assert is_runai_obj_uri("az://some-azure-container/path") assert not is_runai_obj_uri("nfs://some-nfs-path") diff --git a/tests/transformers_utils/test_utils.py b/tests/transformers_utils/test_utils.py index cf83970b4..485c2efff 100644 --- a/tests/transformers_utils/test_utils.py +++ b/tests/transformers_utils/test_utils.py @@ -11,6 +11,7 @@ from vllm.transformers_utils.gguf_utils import ( split_remote_gguf, ) from vllm.transformers_utils.utils import ( + is_azure, is_cloud_storage, is_gcs, is_s3, @@ -31,9 +32,17 @@ def test_is_s3(): assert not is_s3("nfs://nfs-fqdn.local") +def test_is_azure(): + assert is_azure("az://model-container/path") + assert not is_azure("s3://model-path/path-to-model") + assert not is_azure("/unix/local/path") + assert not is_azure("nfs://nfs-fqdn.local") + + def test_is_cloud_storage(): assert is_cloud_storage("gs://model-path") assert is_cloud_storage("s3://model-path/path-to-model") + assert is_cloud_storage("az://model-container/path") assert not is_cloud_storage("/unix/local/path") assert not is_cloud_storage("nfs://nfs-fqdn.local") diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py index dc776fac1..8cd114481 100644 --- a/vllm/config/vllm.py +++ b/vllm/config/vllm.py @@ -1574,8 +1574,9 @@ class VllmConfig: "runai_streamer_sharded", ): raise ValueError( - f"To load a model from S3, 'load_format' " - f"must be 'runai_streamer' or 'runai_streamer_sharded', " + f"To load a model from object storage (S3/GCS/Azure), " + f"'load_format' must be 'runai_streamer' or " + f"'runai_streamer_sharded', " f"but got '{self.load_config.load_format}'. " f"Model: {self.model_config.model}" ) diff --git a/vllm/model_executor/model_loader/runai_streamer_loader.py b/vllm/model_executor/model_loader/runai_streamer_loader.py index 9d3ade4cd..782514210 100644 --- a/vllm/model_executor/model_loader/runai_streamer_loader.py +++ b/vllm/model_executor/model_loader/runai_streamer_loader.py @@ -21,7 +21,7 @@ from vllm.transformers_utils.runai_utils import is_runai_obj_uri, list_safetenso class RunaiModelStreamerLoader(BaseModelLoader): """ Model loader that can load safetensors - files from local FS or S3 bucket. + files from local FS, S3, GCS, or Azure Blob Storage. """ def __init__(self, load_config: LoadConfig): diff --git a/vllm/transformers_utils/runai_utils.py b/vllm/transformers_utils/runai_utils.py index 7e6af2602..248ede6a6 100644 --- a/vllm/transformers_utils/runai_utils.py +++ b/vllm/transformers_utils/runai_utils.py @@ -13,7 +13,7 @@ from vllm.utils.import_utils import PlaceholderModule logger = init_logger(__name__) -SUPPORTED_SCHEMES = ["s3://", "gs://"] +SUPPORTED_SCHEMES = ["s3://", "gs://", "az://"] try: from runai_model_streamer import list_safetensors as runai_list_safetensors diff --git a/vllm/transformers_utils/utils.py b/vllm/transformers_utils/utils.py index 47cebe208..04def3e37 100644 --- a/vllm/transformers_utils/utils.py +++ b/vllm/transformers_utils/utils.py @@ -23,8 +23,12 @@ def is_gcs(model_or_path: str) -> bool: return model_or_path.lower().startswith("gs://") +def is_azure(model_or_path: str) -> bool: + return model_or_path.lower().startswith("az://") + + def is_cloud_storage(model_or_path: str) -> bool: - return is_s3(model_or_path) or is_gcs(model_or_path) + return is_s3(model_or_path) or is_gcs(model_or_path) or is_azure(model_or_path) def without_trust_remote_code(kwargs: dict[str, Any]) -> dict[str, Any]: