[1/n][CI] Load models in CI from S3 instead of HF (#13205)

Signed-off-by: <>
Co-authored-by: EC2 Default User <ec2-user@ip-172-31-20-117.us-west-2.compute.internal>
This commit is contained in:
Kevin H. Luu
2025-02-18 23:34:59 -08:00
committed by GitHub
parent fd84857f64
commit d5d214ac7f
43 changed files with 225 additions and 76 deletions

View File

@@ -8,14 +8,19 @@ from vllm.config import ModelConfig, PoolerConfig
from vllm.model_executor.layers.pooler import PoolingType
from vllm.platforms import current_platform
from .conftest import MODEL_WEIGHTS_S3_BUCKET
@pytest.mark.parametrize(
("model_id", "expected_runner_type", "expected_task"),
[
("facebook/opt-125m", "generate", "generate"),
("intfloat/e5-mistral-7b-instruct", "pooling", "embed"),
("jason9693/Qwen2.5-1.5B-apeach", "pooling", "classify"),
("cross-encoder/ms-marco-MiniLM-L-6-v2", "pooling", "score"),
(f"{MODEL_WEIGHTS_S3_BUCKET}/distilgpt2", "generate", "generate"),
(f"{MODEL_WEIGHTS_S3_BUCKET}/e5-mistral-7b-instruct", "pooling",
"embed"),
(f"{MODEL_WEIGHTS_S3_BUCKET}/Qwen2.5-1.5B-apeach", "pooling",
"classify"),
(f"{MODEL_WEIGHTS_S3_BUCKET}/ms-marco-MiniLM-L-6-v2", "pooling",
"score"),
("Qwen/Qwen2.5-Math-RM-72B", "pooling", "reward"),
("openai/whisper-small", "transcription", "transcription"),
],