[1/n][CI] Load models in CI from S3 instead of HF (#13205)
Signed-off-by: <> Co-authored-by: EC2 Default User <ec2-user@ip-172-31-20-117.us-west-2.compute.internal>
This commit is contained in:
@@ -5,12 +5,17 @@ from typing import List
|
||||
import pytest
|
||||
|
||||
from vllm import LLM
|
||||
from vllm.config import LoadFormat
|
||||
|
||||
from ...conftest import MODEL_WEIGHTS_S3_BUCKET
|
||||
from ..openai.test_vision import TEST_IMAGE_URLS
|
||||
|
||||
RUNAI_STREAMER_LOAD_FORMAT = LoadFormat.RUNAI_STREAMER
|
||||
|
||||
|
||||
def test_chat():
|
||||
llm = LLM(model="meta-llama/Llama-3.2-1B-Instruct")
|
||||
llm = LLM(model=f"{MODEL_WEIGHTS_S3_BUCKET}/Llama-3.2-1B-Instruct",
|
||||
load_format=RUNAI_STREAMER_LOAD_FORMAT)
|
||||
|
||||
prompt1 = "Explain the concept of entropy."
|
||||
messages = [
|
||||
@@ -28,7 +33,8 @@ def test_chat():
|
||||
|
||||
|
||||
def test_multi_chat():
|
||||
llm = LLM(model="meta-llama/Llama-3.2-1B-Instruct")
|
||||
llm = LLM(model=f"{MODEL_WEIGHTS_S3_BUCKET}/Llama-3.2-1B-Instruct",
|
||||
load_format=RUNAI_STREAMER_LOAD_FORMAT)
|
||||
|
||||
prompt1 = "Explain the concept of entropy."
|
||||
prompt2 = "Explain what among us is."
|
||||
@@ -65,7 +71,8 @@ def test_multi_chat():
|
||||
[[TEST_IMAGE_URLS[0], TEST_IMAGE_URLS[1]]])
|
||||
def test_chat_multi_image(image_urls: List[str]):
|
||||
llm = LLM(
|
||||
model="microsoft/Phi-3.5-vision-instruct",
|
||||
model=f"{MODEL_WEIGHTS_S3_BUCKET}/Phi-3.5-vision-instruct",
|
||||
load_format=RUNAI_STREAMER_LOAD_FORMAT,
|
||||
dtype="bfloat16",
|
||||
max_model_len=4096,
|
||||
max_num_seqs=5,
|
||||
|
||||
Reference in New Issue
Block a user