[Feature] Add Azure Blob Storage support for RunAI Model Streamer (#34614)
Signed-off-by: hasethuraman <hsethuraman@microsoft.com>
This commit is contained in:
@@ -620,7 +620,7 @@ RUN set -eux; \
|
||||
ARG BITSANDBYTES_VERSION_X86=0.46.1
|
||||
ARG BITSANDBYTES_VERSION_ARM64=0.42.0
|
||||
ARG TIMM_VERSION=">=1.0.17"
|
||||
ARG RUNAI_MODEL_STREAMER_VERSION=">=0.15.3"
|
||||
ARG RUNAI_MODEL_STREAMER_VERSION=">=0.15.7"
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
|
||||
BITSANDBYTES_VERSION="${BITSANDBYTES_VERSION_ARM64}"; \
|
||||
@@ -628,7 +628,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
BITSANDBYTES_VERSION="${BITSANDBYTES_VERSION_X86}"; \
|
||||
fi; \
|
||||
uv pip install --system accelerate hf_transfer modelscope \
|
||||
"bitsandbytes>=${BITSANDBYTES_VERSION}" "timm${TIMM_VERSION}" "runai-model-streamer[s3,gcs]${RUNAI_MODEL_STREAMER_VERSION}"
|
||||
"bitsandbytes>=${BITSANDBYTES_VERSION}" "timm${TIMM_VERSION}" "runai-model-streamer[s3,gcs,azure]${RUNAI_MODEL_STREAMER_VERSION}"
|
||||
|
||||
# ============================================================
|
||||
# VLLM INSTALLATION (depends on build stage)
|
||||
|
||||
@@ -83,7 +83,7 @@
|
||||
"default": ">=1.0.17"
|
||||
},
|
||||
"RUNAI_MODEL_STREAMER_VERSION": {
|
||||
"default": ">=0.15.3"
|
||||
"default": ">=0.15.7"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -31,6 +31,16 @@ vllm serve gs://core-llm/Llama-3-8b \
|
||||
--load-format runai_streamer
|
||||
```
|
||||
|
||||
To run model from Azure Blob Storage run:
|
||||
|
||||
```bash
|
||||
AZURE_STORAGE_ACCOUNT_NAME=<account> \
|
||||
vllm serve az://<container>/<model-path> \
|
||||
--load-format runai_streamer
|
||||
```
|
||||
|
||||
Authentication uses `DefaultAzureCredential`, which supports `az login`, managed identity, environment variables (`AZURE_CLIENT_ID`, `AZURE_TENANT_ID`, `AZURE_CLIENT_SECRET`), and other methods.
|
||||
|
||||
To run model from a S3 compatible object store run:
|
||||
|
||||
```bash
|
||||
|
||||
@@ -42,7 +42,7 @@ tritonclient>=2.51.0
|
||||
|
||||
numba == 0.61.2 # Required for N-gram speculative decoding
|
||||
numpy
|
||||
runai-model-streamer[s3,gcs]==0.15.3
|
||||
runai-model-streamer[s3,gcs,azure]==0.15.7
|
||||
fastsafetensors>=0.2.2
|
||||
instanttensor>=0.1.5
|
||||
pydantic>=2.12 # 2.11 leads to error on python 3.13
|
||||
|
||||
@@ -15,7 +15,7 @@ tensorizer==2.10.1
|
||||
packaging>=24.2
|
||||
setuptools>=77.0.3,<80.0.0
|
||||
setuptools-scm>=8
|
||||
runai-model-streamer[s3,gcs]==0.15.3
|
||||
runai-model-streamer[s3,gcs,azure]==0.15.7
|
||||
conch-triton-kernels==1.2.1
|
||||
timm>=1.0.17
|
||||
# amd-quark: required for Quark quantization on ROCm
|
||||
|
||||
@@ -56,7 +56,7 @@ grpcio-reflection==1.78.0
|
||||
arctic-inference == 0.1.1 # Required for suffix decoding test
|
||||
numba == 0.61.2 # Required for N-gram speculative decoding
|
||||
numpy
|
||||
runai-model-streamer[s3,gcs]==0.15.3
|
||||
runai-model-streamer[s3,gcs,azure]==0.15.7
|
||||
fastsafetensors>=0.2.2 # 0.2.2 contains important fixes for multi-GPU mem usage
|
||||
instanttensor>=0.1.5
|
||||
pydantic>=2.12 # 2.11 leads to error on python 3.13
|
||||
|
||||
@@ -64,6 +64,14 @@ audioread==3.0.1
|
||||
# via librosa
|
||||
av==16.1.0
|
||||
# via -r requirements/test.in
|
||||
azure-core==1.38.2
|
||||
# via
|
||||
# azure-identity
|
||||
# azure-storage-blob
|
||||
azure-identity==1.25.2
|
||||
# via runai-model-streamer-azure
|
||||
azure-storage-blob==12.28.0
|
||||
# via runai-model-streamer-azure
|
||||
backoff==2.2.1
|
||||
# via
|
||||
# -r requirements/test.in
|
||||
@@ -103,8 +111,10 @@ certifi==2024.8.30
|
||||
# rasterio
|
||||
# requests
|
||||
# sentry-sdk
|
||||
cffi==1.17.1
|
||||
# via soundfile
|
||||
cffi==2.0.0
|
||||
# via
|
||||
# cryptography
|
||||
# soundfile
|
||||
chardet==5.2.0
|
||||
# via mbstrdecoder
|
||||
charset-normalizer==3.4.0
|
||||
@@ -148,6 +158,12 @@ coverage==7.10.6
|
||||
# via pytest-cov
|
||||
cramjam==2.9.0
|
||||
# via fastparquet
|
||||
cryptography==46.0.5
|
||||
# via
|
||||
# azure-identity
|
||||
# azure-storage-blob
|
||||
# msal
|
||||
# pyjwt
|
||||
cuda-bindings==12.9.4
|
||||
# via torch
|
||||
cuda-pathfinder==1.3.3
|
||||
@@ -379,6 +395,8 @@ iniconfig==2.0.0
|
||||
# via pytest
|
||||
instanttensor==0.1.5
|
||||
# via -r requirements/test.in
|
||||
isodate==0.7.2
|
||||
# via azure-storage-blob
|
||||
isoduration==20.11.0
|
||||
# via jsonschema
|
||||
isort==5.13.2
|
||||
@@ -492,6 +510,12 @@ more-itertools==10.5.0
|
||||
# via lm-eval
|
||||
mpmath==1.3.0
|
||||
# via sympy
|
||||
msal==1.34.0
|
||||
# via
|
||||
# azure-identity
|
||||
# msal-extensions
|
||||
msal-extensions==1.3.1
|
||||
# via azure-identity
|
||||
msgpack==1.1.0
|
||||
# via
|
||||
# librosa
|
||||
@@ -828,6 +852,8 @@ pydantic-extra-types==2.10.5
|
||||
# via mistral-common
|
||||
pygments==2.18.0
|
||||
# via rich
|
||||
pyjwt==2.11.0
|
||||
# via msal
|
||||
pyogrio==0.11.0
|
||||
# via geopandas
|
||||
pyparsing==3.2.0
|
||||
@@ -945,6 +971,7 @@ regex==2024.9.11
|
||||
# transformers
|
||||
requests==2.32.3
|
||||
# via
|
||||
# azure-core
|
||||
# buildkite-test-collector
|
||||
# datasets
|
||||
# diffusers
|
||||
@@ -957,6 +984,7 @@ requests==2.32.3
|
||||
# lightly
|
||||
# lm-eval
|
||||
# mistral-common
|
||||
# msal
|
||||
# mteb
|
||||
# pooch
|
||||
# ray
|
||||
@@ -993,11 +1021,13 @@ rsa==4.9.1
|
||||
# via google-auth
|
||||
rtree==1.4.0
|
||||
# via torchgeo
|
||||
runai-model-streamer==0.15.3
|
||||
runai-model-streamer==0.15.7
|
||||
# via -r requirements/test.in
|
||||
runai-model-streamer-gcs==0.15.3
|
||||
runai-model-streamer-azure==0.15.7
|
||||
# via runai-model-streamer
|
||||
runai-model-streamer-s3==0.15.3
|
||||
runai-model-streamer-gcs==0.15.7
|
||||
# via runai-model-streamer
|
||||
runai-model-streamer-s3==0.15.7
|
||||
# via runai-model-streamer
|
||||
s3transfer==0.10.3
|
||||
# via boto3
|
||||
@@ -1266,6 +1296,9 @@ typing-extensions==4.15.0
|
||||
# aiosignal
|
||||
# albumentations
|
||||
# alembic
|
||||
# azure-core
|
||||
# azure-identity
|
||||
# azure-storage-blob
|
||||
# chz
|
||||
# fastapi
|
||||
# grpcio
|
||||
|
||||
2
setup.py
2
setup.py
@@ -970,7 +970,7 @@ setup(
|
||||
"tensorizer": ["tensorizer==2.10.1"],
|
||||
"fastsafetensors": ["fastsafetensors >= 0.2.2"],
|
||||
"instanttensor": ["instanttensor >= 0.1.5"],
|
||||
"runai": ["runai-model-streamer[s3,gcs] >= 0.15.3"],
|
||||
"runai": ["runai-model-streamer[s3,gcs,azure] >= 0.15.7"],
|
||||
"audio": [
|
||||
"librosa",
|
||||
"scipy",
|
||||
|
||||
@@ -19,6 +19,7 @@ from vllm.transformers_utils.runai_utils import (
|
||||
def test_is_runai_obj_uri():
|
||||
assert is_runai_obj_uri("gs://some-gcs-bucket/path")
|
||||
assert is_runai_obj_uri("s3://some-s3-bucket/path")
|
||||
assert is_runai_obj_uri("az://some-azure-container/path")
|
||||
assert not is_runai_obj_uri("nfs://some-nfs-path")
|
||||
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@ from vllm.transformers_utils.gguf_utils import (
|
||||
split_remote_gguf,
|
||||
)
|
||||
from vllm.transformers_utils.utils import (
|
||||
is_azure,
|
||||
is_cloud_storage,
|
||||
is_gcs,
|
||||
is_s3,
|
||||
@@ -31,9 +32,17 @@ def test_is_s3():
|
||||
assert not is_s3("nfs://nfs-fqdn.local")
|
||||
|
||||
|
||||
def test_is_azure():
|
||||
assert is_azure("az://model-container/path")
|
||||
assert not is_azure("s3://model-path/path-to-model")
|
||||
assert not is_azure("/unix/local/path")
|
||||
assert not is_azure("nfs://nfs-fqdn.local")
|
||||
|
||||
|
||||
def test_is_cloud_storage():
|
||||
assert is_cloud_storage("gs://model-path")
|
||||
assert is_cloud_storage("s3://model-path/path-to-model")
|
||||
assert is_cloud_storage("az://model-container/path")
|
||||
assert not is_cloud_storage("/unix/local/path")
|
||||
assert not is_cloud_storage("nfs://nfs-fqdn.local")
|
||||
|
||||
|
||||
@@ -1574,8 +1574,9 @@ class VllmConfig:
|
||||
"runai_streamer_sharded",
|
||||
):
|
||||
raise ValueError(
|
||||
f"To load a model from S3, 'load_format' "
|
||||
f"must be 'runai_streamer' or 'runai_streamer_sharded', "
|
||||
f"To load a model from object storage (S3/GCS/Azure), "
|
||||
f"'load_format' must be 'runai_streamer' or "
|
||||
f"'runai_streamer_sharded', "
|
||||
f"but got '{self.load_config.load_format}'. "
|
||||
f"Model: {self.model_config.model}"
|
||||
)
|
||||
|
||||
@@ -21,7 +21,7 @@ from vllm.transformers_utils.runai_utils import is_runai_obj_uri, list_safetenso
|
||||
class RunaiModelStreamerLoader(BaseModelLoader):
|
||||
"""
|
||||
Model loader that can load safetensors
|
||||
files from local FS or S3 bucket.
|
||||
files from local FS, S3, GCS, or Azure Blob Storage.
|
||||
"""
|
||||
|
||||
def __init__(self, load_config: LoadConfig):
|
||||
|
||||
@@ -13,7 +13,7 @@ from vllm.utils.import_utils import PlaceholderModule
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
SUPPORTED_SCHEMES = ["s3://", "gs://"]
|
||||
SUPPORTED_SCHEMES = ["s3://", "gs://", "az://"]
|
||||
|
||||
try:
|
||||
from runai_model_streamer import list_safetensors as runai_list_safetensors
|
||||
|
||||
@@ -23,8 +23,12 @@ def is_gcs(model_or_path: str) -> bool:
|
||||
return model_or_path.lower().startswith("gs://")
|
||||
|
||||
|
||||
def is_azure(model_or_path: str) -> bool:
|
||||
return model_or_path.lower().startswith("az://")
|
||||
|
||||
|
||||
def is_cloud_storage(model_or_path: str) -> bool:
|
||||
return is_s3(model_or_path) or is_gcs(model_or_path)
|
||||
return is_s3(model_or_path) or is_gcs(model_or_path) or is_azure(model_or_path)
|
||||
|
||||
|
||||
def without_trust_remote_code(kwargs: dict[str, Any]) -> dict[str, Any]:
|
||||
|
||||
Reference in New Issue
Block a user