[1/n][CI] Load models in CI from S3 instead of HF (#13205)

Signed-off-by: <>
Co-authored-by: EC2 Default User <ec2-user@ip-172-31-20-117.us-west-2.compute.internal>
This commit is contained in:
Kevin H. Luu
2025-02-18 23:34:59 -08:00
committed by GitHub
parent fd84857f64
commit d5d214ac7f
43 changed files with 225 additions and 76 deletions

View File

@@ -21,8 +21,10 @@ from vllm.lora.request import LoRARequest
from vllm.usage.usage_lib import UsageContext
from vllm.utils import FlexibleArgumentParser
MODEL = "google/gemma-1.1-2b-it"
ENGINE_ARGS = AsyncEngineArgs(model=MODEL, enforce_eager=True)
MODEL = "s3://vllm-ci-model-weights/gemma-1.1-2b-it"
ENGINE_ARGS = AsyncEngineArgs(model=MODEL,
load_format="runai_streamer",
enforce_eager=True)
RAISED_ERROR = KeyError
RAISED_VALUE = "foo"