[1/n][CI] Load models in CI from S3 instead of HF (#13205)
Signed-off-by: <> Co-authored-by: EC2 Default User <ec2-user@ip-172-31-20-117.us-west-2.compute.internal>
This commit is contained in:
@@ -7,12 +7,13 @@ import weakref
|
||||
import jsonschema
|
||||
import pytest
|
||||
|
||||
from vllm.config import LoadFormat
|
||||
from vllm.distributed import cleanup_dist_env_and_memory
|
||||
from vllm.entrypoints.llm import LLM
|
||||
from vllm.outputs import RequestOutput
|
||||
from vllm.sampling_params import GuidedDecodingParams, SamplingParams
|
||||
|
||||
MODEL_NAME = "Qwen/Qwen2.5-7B-Instruct"
|
||||
MODEL_NAME = "s3://vllm-ci-model-weights/Qwen2.5-1.5B-Instruct"
|
||||
GUIDED_DECODING_BACKENDS = ["outlines", "lm-format-enforcer", "xgrammar"]
|
||||
|
||||
|
||||
@@ -20,7 +21,9 @@ GUIDED_DECODING_BACKENDS = ["outlines", "lm-format-enforcer", "xgrammar"]
|
||||
def llm():
|
||||
# pytest caches the fixture so we use weakref.proxy to
|
||||
# enable garbage collection
|
||||
llm = LLM(model=MODEL_NAME, max_model_len=1024)
|
||||
llm = LLM(model=MODEL_NAME,
|
||||
load_format=LoadFormat.RUNAI_STREAMER,
|
||||
max_model_len=1024)
|
||||
|
||||
with llm.deprecate_legacy_api():
|
||||
yield weakref.proxy(llm)
|
||||
|
||||
Reference in New Issue
Block a user