[1/n][CI] Load models in CI from S3 instead of HF (#13205)

Signed-off-by: <>
Co-authored-by: EC2 Default User <ec2-user@ip-172-31-20-117.us-west-2.compute.internal>
This commit is contained in:
Kevin H. Luu
2025-02-18 23:34:59 -08:00
committed by GitHub
parent fd84857f64
commit d5d214ac7f
43 changed files with 225 additions and 76 deletions

View File

@@ -6,9 +6,10 @@ from typing import List
import pytest
from vllm import LLM, PoolingParams, PoolingRequestOutput
from vllm.config import LoadFormat
from vllm.distributed import cleanup_dist_env_and_memory
MODEL_NAME = "intfloat/e5-mistral-7b-instruct"
MODEL_NAME = "s3://vllm-ci-model-weights/e5-mistral-7b-instruct"
PROMPTS = [
"Hello, my name is",
@@ -32,6 +33,7 @@ def llm():
# pytest caches the fixture so we use weakref.proxy to
# enable garbage collection
llm = LLM(model=MODEL_NAME,
load_format=LoadFormat.RUNAI_STREAMER,
max_num_batched_tokens=32768,
tensor_parallel_size=1,
gpu_memory_utilization=0.75,