[3/n][CI] Load Quantization test models with S3 (#13570)

Signed-off-by: <>
Co-authored-by: EC2 Default User <ec2-user@ip-172-31-20-117.us-west-2.compute.internal>
This commit is contained in:
Kevin H. Luu
2025-02-19 18:12:30 -08:00
committed by GitHub
parent a4c402a756
commit 473f51cfd9
2 changed files with 53 additions and 2 deletions

View File

@@ -27,8 +27,6 @@ from vllm.model_executor.layers.quantization import (QuantizationConfig,
from vllm.platforms import current_platform
from vllm.utils import PlaceholderModule
logger = init_logger(__name__)
try:
from runai_model_streamer import SafetensorsStreamer
except (ImportError, OSError):
@@ -39,6 +37,8 @@ except (ImportError, OSError):
SafetensorsStreamer = runai_model_streamer.placeholder_attr(
"SafetensorsStreamer")
logger = init_logger(__name__)
# use system-level temp directory for file locks, so that multiple users
# can share the same lock without error.
# lock files in the temp directory will be automatically deleted when the